diff --git a/database/database.mysql b/database/database.mysql
index eea199a22b8eee6bd09904fe59da2981069b6b76..d635f6dc1e8120247a8db3676b18a9f77559ab76 100644
--- a/database/database.mysql
+++ b/database/database.mysql
@@ -562,6 +562,16 @@ CREATE TABLE role (
   UNIQUE KEY name (name)
 ) TYPE=MyISAM;
 
+--
+-- Table structure for table 'search_dataset'
+--
+CREATE TABLE search_dataset (
+  sid int(10) unsigned NOT NULL default '0',
+  type varchar(16) default NULL,
+  data longtext NOT NULL,
+  KEY sid_type (sid, type)
+) TYPE=MyISAM;
+
 --
 -- Table structure for table 'search_index'
 --
@@ -572,9 +582,9 @@ CREATE TABLE search_index (
   type varchar(16) default NULL,
   fromsid int(10) unsigned NOT NULL default '0',
   fromtype varchar(16) default NULL,
-  score int(10) unsigned default NULL,
-  KEY sid (sid),
-  KEY fromsid (fromsid),
+  score float default NULL,
+  KEY sid_type (sid, type),
+  KEY from_sid_type (fromsid, fromtype),
   KEY word (word)
 ) TYPE=MyISAM;
 
@@ -584,7 +594,7 @@ CREATE TABLE search_index (
 
 CREATE TABLE search_total (
   word varchar(50) NOT NULL default '',
-  count int(10) unsigned default NULL,
+  count float default NULL,
   PRIMARY KEY (word)
 ) TYPE=MyISAM;
 
diff --git a/database/database.pgsql b/database/database.pgsql
index c3cf8738d99721fa63d0f7773428f6ae25c5112b..ba2fd0a8fe0fadfdac52623b1a2daae2f5e6eb4e 100644
--- a/database/database.pgsql
+++ b/database/database.pgsql
@@ -571,6 +571,16 @@ CREATE TABLE role (
   UNIQUE (name)
 );
 
+--
+-- Table structure for table 'search_dataset'
+--
+CREATE TABLE search_dataset (
+  sid integer NOT NULL default '0',
+  type varchar(16) default NULL,
+  data text NOT NULL default '',
+  KEY sid_type (sid, type)
+);
+
 --
 -- Table structure for search_index
 --
@@ -581,10 +591,10 @@ CREATE TABLE search_index (
   type varchar(16) default NULL,
   fromsid integer NOT NULL default '0',
   fromtype varchar(16) default NULL,
-  score integer default NULL
+  score float default NULL
 );
-CREATE INDEX search_index_sid_idx ON search_index(sid);
-CREATE INDEX search_index_fromsid_idx ON search_index(fromsid);
+CREATE INDEX search_index_sid_type_idx ON search_index(sid, type);
+CREATE INDEX search_index_from_sid_type_idx ON search_index(fromsid, fromtype);
 CREATE INDEX search_index_word_idx ON search_index(word);
 
 --
diff --git a/database/updates.inc b/database/updates.inc
index 988cb3a6f22fee8e359f8cbe5286785deb2c1320..2c2c57deae4b346cfb99a61ad0ce1bbdae11ec3a 100644
--- a/database/updates.inc
+++ b/database/updates.inc
@@ -66,7 +66,8 @@
   "2005-08-25" => "update_146",
   "2005-09-07" => "update_147",
   "2005-09-18" => "update_148",
-  "2005-09-27" => "update_149"
+  "2005-09-27" => "update_149",
+  "2005-10-15" => "update_150"
 );
 
 function update_110() {
@@ -846,6 +847,78 @@ function update_149() {
   return $ret;
 }
 
+function update_150() {
+  $ret = array();
+
+  $ret[] = update_sql("DELETE FROM {variable} WHERE name = 'node_cron_last'");
+  $ret[] = update_sql("DELETE FROM {variable} WHERE name = 'minimum_word_size'");
+  $ret[] = update_sql("DELETE FROM {variable} WHERE name = 'remove_short'");
+
+  $ret[] = update_sql("DELETE FROM {node_counter} WHERE nid = 0");
+
+  $ret[] = update_sql('DROP TABLE {search_index}');
+  $ret[] = update_sql('DROP TABLE {search_total}');
+  
+  switch ($GLOBALS['db_type']) {
+    case 'mysqli':
+    case 'mysql':
+      $ret[] = update_sql("CREATE TABLE {search_dataset} (
+                           sid int(10) unsigned NOT NULL default '0',
+                           type varchar(16) default NULL,
+                           data longtext NOT NULL,
+                           KEY sid_type (sid, type)
+                           )");
+
+      $ret[] = update_sql("CREATE TABLE {search_index} (
+                           word varchar(50) NOT NULL default '',
+                           sid int(10) unsigned NOT NULL default '0',
+                           type varchar(16) default NULL,
+                           fromsid int(10) unsigned NOT NULL default '0',
+                           fromtype varchar(16) default NULL,
+                           score float default NULL,
+                           KEY sid_type (sid, type),
+                           KEY from_sid_type (fromsid, fromtype),
+                           KEY word (word)
+                           )");
+
+      $ret[] = update_sql("CREATE TABLE {search_total} (
+                           word varchar(50) NOT NULL default '',
+                           count float default NULL,
+                           PRIMARY KEY word (word)
+                           )");
+      break;
+    case 'pgsql':
+      $ret[] = update_sql("CREATE TABLE {search_dataset} (
+                           sid integer NOT NULL default '0',
+                           type varchar(16) default NULL,
+                           data text NOT NULL default '',
+                           KEY sid_type (sid, type)
+                           )");
+
+      $ret[] = update_sql("CREATE TABLE {search_index} (
+                           word varchar(50) NOT NULL default '',
+                           sid integer NOT NULL default '0',
+                           type varchar(16) default NULL,
+                           fromsid integer NOT NULL default '0',
+                           fromtype varchar(16) default NULL,
+                           score float default NULL
+                           )");
+      $ret[] = update_sql("CREATE INDEX search_index_sid_type_idx ON {search_index}(sid, type)");
+      $ret[] = update_sql("CREATE INDEX search_index_from_sid_type_idx ON {search_index}(fromsid, fromtype)");
+      $ret[] = update_sql("CREATE INDEX search_index_word_idx ON {search_index}(word)");
+
+      $ret[] = update_sql("CREATE TABLE {search_total} (
+                           word varchar(50) NOT NULL default '',
+                           count float default NULL
+                           )");
+      $ret[] = update_sql("CREATE INDEX search_total_word_idx ON {search_total}(word)");
+      break;
+    default:
+      break;
+  }
+  return $ret;
+}
+
 function update_sql($sql) {
   $edit = $_POST["edit"];
   $result = db_query($sql);
diff --git a/includes/database.mysql.inc b/includes/database.mysql.inc
index d816b6dd88daaf1d2a5224c161171bf5a748aa6f..7b307a3c5f5c31bc52538a51cde9e9e53c52deba 100644
--- a/includes/database.mysql.inc
+++ b/includes/database.mysql.inc
@@ -238,6 +238,50 @@ function db_query_range($query) {
   return _db_query($query);
 }
 
+/**
+ * Runs a SELECT query and stores its results in a temporary table.
+ *
+ * Use this as a substitute for db_query() when the results need to stored
+ * in a temporary table. Temporary tables exist for the duration of the page
+ * request.
+ * User-supplied arguments to the query should be passed in as separate parameters
+ * so that they can be properly escaped to avoid SQL injection attacks.
+ *
+ * Note that if you need to know how many results were returned, you should do
+ * a SELECT COUNT(*) on the temporary table afterwards. db_num_rows() and
+ * db_affected_rows() do not give consistent result across different database
+ * types in this case.
+ *
+ * @param $query
+ *   A string containing a normal SELECT SQL query.
+ * @param ...
+ *   A variable number of arguments which are substituted into the query using
+ *   printf() syntax. Instead of a variable number of query arguments, you may
+ *   also pass a single array containing the query arguments.
+ * @param $table
+ *   The name of the temporary table to select into. This name will not be
+ *   prefixed as there is no risk of collision.
+ * @return
+ *   A database query result resource, or FALSE if the query was not executed
+ *   correctly.
+ */
+function db_query_temporary($query) {
+  $args = func_get_args();
+  $tablename = array_pop($args);
+
+  $query = preg_replace('/^SELECT/i', 'CREATE TEMPORARY TABLE '. $tablename .' SELECT', db_prefix_tables($query));
+  if (count($args) > 1) {
+    // Check for array (alternative syntax).
+    if (is_array($args[1])) {
+      $args = array_merge(array($query), $args[1]);
+    }
+    $args = array_map('db_escape_string', $args);
+    $args[0] = $query;
+    $query = call_user_func_array('sprintf', $args);
+  }
+  return _db_query($query);
+}
+
 /**
  * Returns a properly formatted Binary Large OBject value.
  *
diff --git a/includes/database.mysqli.inc b/includes/database.mysqli.inc
index b0a5278d07a1e9135e49c9001e7ec17b469127d1..779a4a909d92184886b93caa6d2eff68d2b4bde6 100644
--- a/includes/database.mysqli.inc
+++ b/includes/database.mysqli.inc
@@ -205,6 +205,11 @@ function db_affected_rows() {
  * User-supplied arguments to the query should be passed in as separate parameters
  * so that they can be properly escaped to avoid SQL injection attacks.
  *
+ * Note that if you need to know how many results were returned, you should do
+ * a SELECT COUNT(*) on the temporary table afterwards. db_num_rows() and
+ * db_affected_rows() do not give consistent result across different database
+ * types in this case.
+ *
  * @param $query
  *   A string containing an SQL query.
  * @param ...
@@ -238,6 +243,50 @@ function db_query_range($query) {
   return _db_query($query);
 }
 
+/**
+ * Runs a SELECT query and stores its results in a temporary table.
+ *
+ * Use this as a substitute for db_query() when the results need to stored
+ * in a temporary table. Temporary tables exist for the duration of the page
+ * request.
+ * User-supplied arguments to the query should be passed in as separate parameters
+ * so that they can be properly escaped to avoid SQL injection attacks.
+ *
+ * Note that if you need to know how many results were returned, you should do
+ * a SELECT COUNT(*) on the temporary table afterwards. db_num_rows() and
+ * db_affected_rows() do not give consistent result across different database
+ * types.
+ *
+ * @param $query
+ *   A string containing a normal SELECT SQL query.
+ * @param ...
+ *   A variable number of arguments which are substituted into the query using
+ *   printf() syntax. Instead of a variable number of query arguments, you may
+ *   also pass a single array containing the query arguments.
+ * @param $table
+ *   The name of the temporary table to select into. This name will not be
+ *   prefixed as there is no risk of collision.
+ * @return
+ *   A database query result resource, or FALSE if the query was not executed
+ *   correctly.
+ */
+function db_query_temporary($query) {
+  $args = func_get_args();
+  $tablename = array_pop($args);
+
+  $query = preg_replace('/^SELECT/i', 'CREATE TEMPORARY TABLE '. $tablename .' SELECT', db_prefix_tables($query));
+  if (count($args) > 1) {
+    // Check for array (alternative syntax).
+    if (is_array($args[1])) {
+      $args = array_merge(array($query), $args[1]);
+    }
+    $args = array_map('db_escape_string', $args);
+    $args[0] = $query;
+    $query = call_user_func_array('sprintf', $args);
+  }
+  return _db_query($query);
+}
+
 /**
  * Returns a properly formatted Binary Large OBject value.
  *
diff --git a/includes/database.pgsql.inc b/includes/database.pgsql.inc
index 33c960a20221651c78b73bec143026c83a1e2a3b..4cfa74f829cda6a4a547c475b407c28f83fdda1b 100644
--- a/includes/database.pgsql.inc
+++ b/includes/database.pgsql.inc
@@ -223,6 +223,50 @@ function db_query_range($query) {
   return _db_query($query);
 }
 
+/**
+ * Runs a SELECT query and stores its results in a temporary table.
+ *
+ * Use this as a substitute for db_query() when the results need to stored
+ * in a temporary table. Temporary tables exist for the duration of the page
+ * request.
+ * User-supplied arguments to the query should be passed in as separate parameters
+ * so that they can be properly escaped to avoid SQL injection attacks.
+ *
+ * Note that if you need to know how many results were returned, you should do
+ * a SELECT COUNT(*) on the temporary table afterwards. db_num_rows() and
+ * db_affected_rows() do not give consistent result across different database
+ * types in this case.
+ *
+ * @param $query
+ *   A string containing a normal SELECT SQL query.
+ * @param ...
+ *   A variable number of arguments which are substituted into the query using
+ *   printf() syntax. Instead of a variable number of query arguments, you may
+ *   also pass a single array containing the query arguments.
+ * @param $table
+ *   The name of the temporary table to select into. This name will not be
+ *   prefixed as there is no risk of collision.
+ * @return
+ *   A database query result resource, or FALSE if the query was not executed
+ *   correctly.
+ */
+function db_query_temporary($query) {
+  $args = func_get_args();
+  $tablename = array_pop($args);
+
+  $query = preg_replace('/^SELECT/i', 'CREATE TEMPORARY TABLE '. $tablename .' AS', db_prefix_tables($query));
+  if (count($args) > 1) {
+    // Check for array (alternative syntax).
+    if (is_array($args[1])) {
+      $args = array_merge(array($query), $args[1]);
+    }
+    $args = array_map('db_escape_string', $args);
+    $args[0] = $query;
+    $query = call_user_func_array('sprintf', $args);
+  }
+  return _db_query($query);
+}
+
 /**
  * Returns a properly formatted Binary Large OBject value.
  *
diff --git a/misc/drupal.css b/misc/drupal.css
index 7757cfd129d462096a739facc419ed579e59f145..bc4de087672cac36071a6bd8cdf559bf6a982d17 100644
--- a/misc/drupal.css
+++ b/misc/drupal.css
@@ -445,6 +445,13 @@ img.screenshot {
 .search-results .search-info {
   font-size: 0.85em;
 }
+.search-advanced .criterium {
+  float: left;
+  margin-right: 2em;
+}
+.search-advanced .action {
+  clear: left;
+}
 #tracker td.replies {
   text-align: center;
 }
diff --git a/modules/node.module b/modules/node.module
index da0f2d9e18a893dfc45fbd727f56b59b250de692..9187583e8317998f07a70a90da0b16d7114b1e67 100644
--- a/modules/node.module
+++ b/modules/node.module
@@ -597,17 +597,112 @@ function node_search($op = 'search', $keys = null) {
   switch ($op) {
     case 'name':
       return t('content');
+
     case 'reset':
       variable_del('node_cron_last');
       return;
+
     case 'status':
       $last = variable_get('node_cron_last', 0);
       $total = db_result(db_query('SELECT COUNT(*) FROM {node} WHERE status = 1 AND moderate = 0'));
       $remaining = db_result(db_query('SELECT COUNT(*) FROM {node} n LEFT JOIN {node_comment_statistics} c ON n.nid = c.nid WHERE n.status = 1 AND n.moderate = 0 AND (n.created > %d OR n.changed > %d OR c.last_comment_timestamp > %d)', $last, $last, $last));
       return array('remaining' => $remaining, 'total' => $total);
+
+    case 'admin':
+      $form = array();
+      // Output form for defining rank factor weights.
+      $form['content_ranking'] = array('#type' => 'fieldset', '#title' => t('Content ranking'));
+      $form['content_ranking']['#theme'] = 'node_search_admin';
+      $form['content_ranking']['info'] = array('#type' => 'markup', '#value' => '<em>'. t('The following numbers control which properties the content search should favor when ordering the results. Higher numbers mean more influence. Zero means the property is ignored.') .'</em>');
+
+      $ranking = array('node_rank_relevance' => t('Keyword relevance'),
+                       'node_rank_recent' => t('Recently posted'));
+      if (module_exist('comment')) {
+        $ranking['node_rank_comments'] = t('Number of comments');
+      }
+      if (module_exist('statistics') && variable_get('statistics_count_content_views', 0)) {
+        $ranking['node_rank_views'] = t('Number of views');
+      }
+
+      // Note: reversed to reflect that higher number = higher ranking.
+      $options = drupal_map_assoc(range(0, 10));
+      foreach ($ranking as $var => $title) {
+        $form['content_ranking']['factors'][$var] = array('#title' => $title, '#type' => 'select', '#options' => $options, '#default_value' => variable_get($var, 5));
+      }
+      return $form;
+
     case 'search':
-      list($join, $where) = _db_rewrite_sql();
-      $find = do_search($keys, 'node', 'INNER JOIN {node} n ON n.nid = i.sid '. $join .' INNER JOIN {users} u ON n.uid = u.uid', 'n.status = 1'. (empty($where) ? '' : ' AND '. $where));
+      // Build matching conditions
+      list($join1, $where1) = _db_rewrite_sql();
+      $arguments1 = array();
+      $conditions1 = 'n.status = 1';
+
+      if ($type = search_query_extract($keys, 'type')) {
+        $types = array();
+        foreach (explode(',', $type) as $t) {
+          $types[] = "n.type = '%s'";
+          $arguments1[] = $t;
+        }
+        $conditions1 .= ' AND ('. implode(' OR ', $types) .')';
+        $keys = search_query_insert($keys, 'type');
+      }
+
+      if ($category = search_query_extract($keys, 'category')) {
+        $categories = array();
+        foreach (explode(',', $category) as $c) {
+          $categories[] = "tn.tid = %d";
+          $arguments1[] = $c;
+        }
+        $conditions1 .= ' AND ('. implode(' OR ', $categories) .')';
+        $join1 .= ' INNER JOIN {term_node} tn ON n.nid = tn.nid';
+        $keys = search_query_insert($keys, 'category');
+      }
+
+      // Build ranking expression (we try to map each parameter to a
+      // uniform distribution in the range 0..1).
+      $ranking = array();
+      $arguments2 = array();
+      $join2 = '';
+      // Used to avoid joining on node_comment_statistics twice
+      $stats_join = false;
+      if ($weight = (int)variable_get('node_rank_relevance', 5)) {
+        // Average relevance values hover around 0.15
+        $ranking[] = '%d * i.relevance';
+        $arguments2[] = $weight;
+      }
+      if ($weight = (int)variable_get('node_rank_recent', 5)) {
+        // Exponential decay with half-life of 6 months, starting at last indexed node
+        $ranking[] = '%d * POW(2, (GREATEST(n.created, n.changed, c.last_comment_timestamp) - %d) * 6.43e-8)';
+        $arguments2[] = $weight;
+        $arguments2[] = (int)variable_get('node_cron_last', 0);
+        $join2 .= ' INNER JOIN {node} n ON n.nid = i.sid LEFT JOIN {node_comment_statistics} c ON c.nid = i.sid';
+        $stats_join = true;
+      }
+      if (module_exist('comment') && $weight = (int)variable_get('node_rank_comments', 5)) {
+        // Inverse law that maps the highest reply count on the site to 1 and 0 to 0.
+        $scale = variable_get('node_cron_comments_scale', 0.0);
+        $ranking[] = '%d * (2.0 - 2.0 / (1.0 + c.comment_count * %f))';
+        $arguments2[] = $weight;
+        $arguments2[] = $scale;
+        if (!$stats_join) {
+          $join2 .= ' LEFT JOIN {node_comment_statistics} c ON c.nid = i.sid';
+        }
+      }
+      if (module_exist('statistics') && variable_get('statistics_count_content_views', 0) &&
+          $weight = (int)variable_get('node_rank_views', 5)) {
+        // Inverse law that maps the highest view count on the site to 1 and 0 to 0.
+        $scale = variable_get('node_cron_views_scale', 0.0);
+        $ranking[] = '%d * (2.0 - 2.0 / (1.0 + nc.totalcount * %f))';
+        $arguments2[] = $weight;
+        $arguments2[] = $scale;
+        $join2 .= ' LEFT JOIN {node_counter} nc ON n.nid = nc.nid';
+      }
+      $select2 = (count($ranking) ? implode(' + ', $ranking) : 'i.relevance') . ' AS score';
+
+      // Do search
+      $find = do_search($keys, 'node', 'INNER JOIN {node} n ON n.nid = i.sid '. $join1 .' INNER JOIN {users} u ON n.uid = u.uid', $conditions1 . (empty($where1) ? '' : ' AND '. $where1), $arguments1, $select2, $join2, $arguments2);
+
+      // Load results
       $results = array();
       foreach ($find as $item) {
         $node = node_load($item);
@@ -622,19 +717,86 @@ function node_search($op = 'search', $keys = null) {
         // Allow modules to change $node->body before viewing.
         node_invoke_nodeapi($node, 'view', false, false);
 
+        // Fetch comments for snippet
+        $node->body .= module_invoke('comment', 'nodeapi', $node, 'update index');
+
         $extra = node_invoke_nodeapi($node, 'search result');
         $results[] = array('link' => url('node/'. $item),
                            'type' => node_get_name($node),
                            'title' => $node->title,
                            'user' => theme('username', $node),
                            'date' => $node->changed,
+                           'node' => $node,
                            'extra' => $extra,
                            'snippet' => search_excerpt($keys, $node->body));
       }
       return $results;
+
+    case 'form':
+      $form = array();
+
+      // Keyword boxes
+      $form['advanced'] = array('#type' => 'fieldset', '#title' => t('Advanced search'), '#collapsible' => true, '#collapsed' => true, '#attributes' => array('class' => 'search-advanced'));
+
+      $form['advanced']['keywords'] = array('#type' => 'markup', '#prefix' => '<div class="criterium">', '#suffix' => '</div>');
+      $form['advanced']['keywords']['or'] = array('#type' => 'textfield', '#title' => t('Containing any of the words'), '#size' => 30, '#maxlength' => 255);
+      $form['advanced']['keywords']['phrase'] = array('#type' => 'textfield', '#title' => t('Containing the phrase'), '#size' => 30, '#maxlength' => 255);
+      $form['advanced']['keywords']['negative'] = array('#type' => 'textfield', '#title' => t('Containing none of the words'), '#size' => 30, '#maxlength' => 255);
+
+      // Taxonomy box
+      if ($taxonomy = module_invoke('taxonomy', 'form_all')) {
+        $form['advanced']['category'] = array('#type' => 'select', '#title' => t('Only in the category'), '#prefix' => '<div class="criterium">', '#suffix' => '</div>', '#options' => $taxonomy, '#extra' => 'size="10"', '#multiple' => true);
+      }
+
+      // Node types
+      $types = node_get_types();
+      $form['advanced']['type'] = array('#type' => 'checkboxes', '#title' => t('Only of the type'), '#prefix' => '<div class="criterium">', '#suffix' => '</div>', '#options' => $types, '#multiple' => true);
+      $form['advanced']['submit'] = array('#type' => 'submit', '#value' => t('Advanced Search'), '#prefix' => '<div class="action">', '#suffix' => '</div>');
+      return $form;
+
+    case 'post':
+      // Insert extra restrictions into the search keywords string.
+      $edit = &$_POST['edit'];
+      if (is_array($edit['type'])) {
+        $keys = search_query_insert($keys, 'type', implode(',', array_keys($edit['type'])));
+      }
+      if (is_array($edit['category'])) {
+        $keys = search_query_insert($keys, 'category', implode(',', $edit['category']));
+      }
+      if ($edit['or'] != '') {
+        if (preg_match_all('/ ("[^"]+"|[^" ]+)/i', ' '. $edit['or'], $matches)) {
+          $keys = $keys .' '. implode(' OR ', $matches[1]);
+        }
+      }
+      if ($edit['negative'] != '') {
+        if (preg_match_all('/ ("[^"]+"|[^" ]+)/i', ' '. $edit['negative'], $matches)) {
+          $keys = $keys .' -'. implode(' -', $matches[1]);
+        }
+      }
+      if ($edit['phrase'] != '') {
+        $keys .= ' "'. str_replace('"', ' ', $edit['phrase']) .'"';
+      }
+      return trim($keys);
   }
 }
 
+function theme_node_search_admin($form) {
+  $output = form_render($form['info']);
+
+  $header = array(t('Factor'), t('Weight'));
+  foreach (element_children($form['factors']) as $key) {
+    $row = array();
+    $row[] = $form['factors'][$key]['#title'];
+    unset($form['factors'][$key]['#title']);
+    $row[] = form_render($form['factors'][$key]);
+    $rows[] = $row;
+  }
+  $output .= theme('table', $header, $rows);
+
+  $output .= form_render($form);
+  return $output;
+}
+
 /**
  * Menu callback; presents general node configuration options.
  */
@@ -1864,6 +2026,10 @@ function node_update_index() {
   $last = variable_get('node_cron_last', 0);
   $limit = (int)variable_get('search_cron_limit', 100);
 
+  // Store the maximum possible comments per thread (used for ranking by reply count)
+  variable_set('node_cron_comments_scale', 1.0 / max(1, db_result(db_query('SELECT MAX(comment_count) FROM {node_comment_statistics}'))));
+  variable_set('node_cron_views_scale', 1.0 / max(1, db_result(db_query('SELECT MAX(totalcount) FROM {node_counter}'))));
+
   $result = db_query_range('SELECT n.nid, c.last_comment_timestamp FROM {node} n LEFT JOIN {node_comment_statistics} c ON n.nid = c.nid WHERE n.status = 1 AND n.moderate = 0 AND (n.created > %d OR n.changed > %d OR c.last_comment_timestamp > %d) ORDER BY GREATEST(n.created, n.changed, c.last_comment_timestamp) ASC', $last, $last, $last, 0, $limit);
 
   while ($node = db_fetch_object($result)) {
diff --git a/modules/node/node.module b/modules/node/node.module
index da0f2d9e18a893dfc45fbd727f56b59b250de692..9187583e8317998f07a70a90da0b16d7114b1e67 100644
--- a/modules/node/node.module
+++ b/modules/node/node.module
@@ -597,17 +597,112 @@ function node_search($op = 'search', $keys = null) {
   switch ($op) {
     case 'name':
       return t('content');
+
     case 'reset':
       variable_del('node_cron_last');
       return;
+
     case 'status':
       $last = variable_get('node_cron_last', 0);
       $total = db_result(db_query('SELECT COUNT(*) FROM {node} WHERE status = 1 AND moderate = 0'));
       $remaining = db_result(db_query('SELECT COUNT(*) FROM {node} n LEFT JOIN {node_comment_statistics} c ON n.nid = c.nid WHERE n.status = 1 AND n.moderate = 0 AND (n.created > %d OR n.changed > %d OR c.last_comment_timestamp > %d)', $last, $last, $last));
       return array('remaining' => $remaining, 'total' => $total);
+
+    case 'admin':
+      $form = array();
+      // Output form for defining rank factor weights.
+      $form['content_ranking'] = array('#type' => 'fieldset', '#title' => t('Content ranking'));
+      $form['content_ranking']['#theme'] = 'node_search_admin';
+      $form['content_ranking']['info'] = array('#type' => 'markup', '#value' => '<em>'. t('The following numbers control which properties the content search should favor when ordering the results. Higher numbers mean more influence. Zero means the property is ignored.') .'</em>');
+
+      $ranking = array('node_rank_relevance' => t('Keyword relevance'),
+                       'node_rank_recent' => t('Recently posted'));
+      if (module_exist('comment')) {
+        $ranking['node_rank_comments'] = t('Number of comments');
+      }
+      if (module_exist('statistics') && variable_get('statistics_count_content_views', 0)) {
+        $ranking['node_rank_views'] = t('Number of views');
+      }
+
+      // Note: reversed to reflect that higher number = higher ranking.
+      $options = drupal_map_assoc(range(0, 10));
+      foreach ($ranking as $var => $title) {
+        $form['content_ranking']['factors'][$var] = array('#title' => $title, '#type' => 'select', '#options' => $options, '#default_value' => variable_get($var, 5));
+      }
+      return $form;
+
     case 'search':
-      list($join, $where) = _db_rewrite_sql();
-      $find = do_search($keys, 'node', 'INNER JOIN {node} n ON n.nid = i.sid '. $join .' INNER JOIN {users} u ON n.uid = u.uid', 'n.status = 1'. (empty($where) ? '' : ' AND '. $where));
+      // Build matching conditions
+      list($join1, $where1) = _db_rewrite_sql();
+      $arguments1 = array();
+      $conditions1 = 'n.status = 1';
+
+      if ($type = search_query_extract($keys, 'type')) {
+        $types = array();
+        foreach (explode(',', $type) as $t) {
+          $types[] = "n.type = '%s'";
+          $arguments1[] = $t;
+        }
+        $conditions1 .= ' AND ('. implode(' OR ', $types) .')';
+        $keys = search_query_insert($keys, 'type');
+      }
+
+      if ($category = search_query_extract($keys, 'category')) {
+        $categories = array();
+        foreach (explode(',', $category) as $c) {
+          $categories[] = "tn.tid = %d";
+          $arguments1[] = $c;
+        }
+        $conditions1 .= ' AND ('. implode(' OR ', $categories) .')';
+        $join1 .= ' INNER JOIN {term_node} tn ON n.nid = tn.nid';
+        $keys = search_query_insert($keys, 'category');
+      }
+
+      // Build ranking expression (we try to map each parameter to a
+      // uniform distribution in the range 0..1).
+      $ranking = array();
+      $arguments2 = array();
+      $join2 = '';
+      // Used to avoid joining on node_comment_statistics twice
+      $stats_join = false;
+      if ($weight = (int)variable_get('node_rank_relevance', 5)) {
+        // Average relevance values hover around 0.15
+        $ranking[] = '%d * i.relevance';
+        $arguments2[] = $weight;
+      }
+      if ($weight = (int)variable_get('node_rank_recent', 5)) {
+        // Exponential decay with half-life of 6 months, starting at last indexed node
+        $ranking[] = '%d * POW(2, (GREATEST(n.created, n.changed, c.last_comment_timestamp) - %d) * 6.43e-8)';
+        $arguments2[] = $weight;
+        $arguments2[] = (int)variable_get('node_cron_last', 0);
+        $join2 .= ' INNER JOIN {node} n ON n.nid = i.sid LEFT JOIN {node_comment_statistics} c ON c.nid = i.sid';
+        $stats_join = true;
+      }
+      if (module_exist('comment') && $weight = (int)variable_get('node_rank_comments', 5)) {
+        // Inverse law that maps the highest reply count on the site to 1 and 0 to 0.
+        $scale = variable_get('node_cron_comments_scale', 0.0);
+        $ranking[] = '%d * (2.0 - 2.0 / (1.0 + c.comment_count * %f))';
+        $arguments2[] = $weight;
+        $arguments2[] = $scale;
+        if (!$stats_join) {
+          $join2 .= ' LEFT JOIN {node_comment_statistics} c ON c.nid = i.sid';
+        }
+      }
+      if (module_exist('statistics') && variable_get('statistics_count_content_views', 0) &&
+          $weight = (int)variable_get('node_rank_views', 5)) {
+        // Inverse law that maps the highest view count on the site to 1 and 0 to 0.
+        $scale = variable_get('node_cron_views_scale', 0.0);
+        $ranking[] = '%d * (2.0 - 2.0 / (1.0 + nc.totalcount * %f))';
+        $arguments2[] = $weight;
+        $arguments2[] = $scale;
+        $join2 .= ' LEFT JOIN {node_counter} nc ON n.nid = nc.nid';
+      }
+      $select2 = (count($ranking) ? implode(' + ', $ranking) : 'i.relevance') . ' AS score';
+
+      // Do search
+      $find = do_search($keys, 'node', 'INNER JOIN {node} n ON n.nid = i.sid '. $join1 .' INNER JOIN {users} u ON n.uid = u.uid', $conditions1 . (empty($where1) ? '' : ' AND '. $where1), $arguments1, $select2, $join2, $arguments2);
+
+      // Load results
       $results = array();
       foreach ($find as $item) {
         $node = node_load($item);
@@ -622,19 +717,86 @@ function node_search($op = 'search', $keys = null) {
         // Allow modules to change $node->body before viewing.
         node_invoke_nodeapi($node, 'view', false, false);
 
+        // Fetch comments for snippet
+        $node->body .= module_invoke('comment', 'nodeapi', $node, 'update index');
+
         $extra = node_invoke_nodeapi($node, 'search result');
         $results[] = array('link' => url('node/'. $item),
                            'type' => node_get_name($node),
                            'title' => $node->title,
                            'user' => theme('username', $node),
                            'date' => $node->changed,
+                           'node' => $node,
                            'extra' => $extra,
                            'snippet' => search_excerpt($keys, $node->body));
       }
       return $results;
+
+    case 'form':
+      $form = array();
+
+      // Keyword boxes
+      $form['advanced'] = array('#type' => 'fieldset', '#title' => t('Advanced search'), '#collapsible' => true, '#collapsed' => true, '#attributes' => array('class' => 'search-advanced'));
+
+      $form['advanced']['keywords'] = array('#type' => 'markup', '#prefix' => '<div class="criterium">', '#suffix' => '</div>');
+      $form['advanced']['keywords']['or'] = array('#type' => 'textfield', '#title' => t('Containing any of the words'), '#size' => 30, '#maxlength' => 255);
+      $form['advanced']['keywords']['phrase'] = array('#type' => 'textfield', '#title' => t('Containing the phrase'), '#size' => 30, '#maxlength' => 255);
+      $form['advanced']['keywords']['negative'] = array('#type' => 'textfield', '#title' => t('Containing none of the words'), '#size' => 30, '#maxlength' => 255);
+
+      // Taxonomy box
+      if ($taxonomy = module_invoke('taxonomy', 'form_all')) {
+        $form['advanced']['category'] = array('#type' => 'select', '#title' => t('Only in the category'), '#prefix' => '<div class="criterium">', '#suffix' => '</div>', '#options' => $taxonomy, '#extra' => 'size="10"', '#multiple' => true);
+      }
+
+      // Node types
+      $types = node_get_types();
+      $form['advanced']['type'] = array('#type' => 'checkboxes', '#title' => t('Only of the type'), '#prefix' => '<div class="criterium">', '#suffix' => '</div>', '#options' => $types, '#multiple' => true);
+      $form['advanced']['submit'] = array('#type' => 'submit', '#value' => t('Advanced Search'), '#prefix' => '<div class="action">', '#suffix' => '</div>');
+      return $form;
+
+    case 'post':
+      // Insert extra restrictions into the search keywords string.
+      $edit = &$_POST['edit'];
+      if (is_array($edit['type'])) {
+        $keys = search_query_insert($keys, 'type', implode(',', array_keys($edit['type'])));
+      }
+      if (is_array($edit['category'])) {
+        $keys = search_query_insert($keys, 'category', implode(',', $edit['category']));
+      }
+      if ($edit['or'] != '') {
+        if (preg_match_all('/ ("[^"]+"|[^" ]+)/i', ' '. $edit['or'], $matches)) {
+          $keys = $keys .' '. implode(' OR ', $matches[1]);
+        }
+      }
+      if ($edit['negative'] != '') {
+        if (preg_match_all('/ ("[^"]+"|[^" ]+)/i', ' '. $edit['negative'], $matches)) {
+          $keys = $keys .' -'. implode(' -', $matches[1]);
+        }
+      }
+      if ($edit['phrase'] != '') {
+        $keys .= ' "'. str_replace('"', ' ', $edit['phrase']) .'"';
+      }
+      return trim($keys);
   }
 }
 
+function theme_node_search_admin($form) {
+  $output = form_render($form['info']);
+
+  $header = array(t('Factor'), t('Weight'));
+  foreach (element_children($form['factors']) as $key) {
+    $row = array();
+    $row[] = $form['factors'][$key]['#title'];
+    unset($form['factors'][$key]['#title']);
+    $row[] = form_render($form['factors'][$key]);
+    $rows[] = $row;
+  }
+  $output .= theme('table', $header, $rows);
+
+  $output .= form_render($form);
+  return $output;
+}
+
 /**
  * Menu callback; presents general node configuration options.
  */
@@ -1864,6 +2026,10 @@ function node_update_index() {
   $last = variable_get('node_cron_last', 0);
   $limit = (int)variable_get('search_cron_limit', 100);
 
+  // Store the maximum possible comments per thread (used for ranking by reply count)
+  variable_set('node_cron_comments_scale', 1.0 / max(1, db_result(db_query('SELECT MAX(comment_count) FROM {node_comment_statistics}'))));
+  variable_set('node_cron_views_scale', 1.0 / max(1, db_result(db_query('SELECT MAX(totalcount) FROM {node_counter}'))));
+
   $result = db_query_range('SELECT n.nid, c.last_comment_timestamp FROM {node} n LEFT JOIN {node_comment_statistics} c ON n.nid = c.nid WHERE n.status = 1 AND n.moderate = 0 AND (n.created > %d OR n.changed > %d OR c.last_comment_timestamp > %d) ORDER BY GREATEST(n.created, n.changed, c.last_comment_timestamp) ASC', $last, $last, $last, 0, $limit);
 
   while ($node = db_fetch_object($result)) {
diff --git a/modules/search.module b/modules/search.module
index adc020eb57a808c68844e2367bd830aa78942029..e26f7045e2fba386a4718c57e3c2e5cf50b4ad08 100644
--- a/modules/search.module
+++ b/modules/search.module
@@ -15,32 +15,80 @@
  * Lu     Letter, Uppercase
  * Ll     Letter, Lowercase
  * Lt     Letter, Titlecase
- * Lm     Letter, Modifier
  * Lo     Letter, Other
- * Mn     Mark, Nonspacing
- * Mc     Mark, Spacing Combining
  * Nd     Number, Decimal Digit
- * Nl     Number, Letter
  * No     Number, Other
- * Sm     Symbol, Math
- * Sc     Symbol, Currency
- * Sk     Symbol, Modifier
- * So     Symbol, Other
- *
- * All character classes not in the list above (enclosing marks, punctuation, control codes and spacers):
- * 'Me', 'Pc', 'Pd', 'Ps', 'Pe', 'Pi', 'Pf', 'Po', 'Zs', 'Zl', 'Zp', 'Cc', 'Cf', 'Cs', 'Co'
  */
-define('PREG_CLASS_SEARCH_EXCLUDE', '\x{0}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}\x{5f}\x{7b}\x{7d}\x{7f}-\x{a1}\x{ab}\x{ad}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{488}\x{489}\x{55a}-\x{55f}\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{600}-\x{603}\x{60c}\x{60d}\x{61b}\x{61f}\x{66a}-\x{66d}\x{6d4}\x{6dd}\x{6de}\x{700}-\x{70d}\x{70f}\x{964}\x{965}\x{970}\x{df4}\x{e4f}\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}\x{1361}-\x{1368}\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}\x{1736}\x{17b4}\x{17b5}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{180e}\x{1944}\x{1945}\x{2000}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}\x{205f}-\x{2063}\x{206a}-\x{206f}\x{207d}\x{207e}\x{208d}\x{208e}\x{20dd}-\x{20e0}\x{20e2}-\x{20e4}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}\x{3000}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}\x{30fb}\x{d800}\x{db7f}\x{db80}\x{dbff}\x{dc00}\x{dfff}\x{e000}\x{f8ff}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}\x{fe6a}\x{fe6b}\x{feff}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-\x{ff65}\x{fff9}-\x{fffb}\x{10100}\x{10101}\x{1039f}\x{1d173}-\x{1d17a}\x{e0001}\x{e0020}-\x{e007f}\x{f0000}\x{ffffd}\x{100000}');
+define('PREG_CLASS_SEARCH_EXCLUDE',
+'\x{0}-\x{2f}\x{3a}-\x{40}\x{5b}-\x{60}\x{7b}-\x{bf}\x{d7}\x{f7}\x{2b0}-'.
+'\x{385}\x{387}\x{3f6}\x{482}-\x{489}\x{559}-\x{55f}\x{589}-\x{5c7}\x{5f3}-'.
+'\x{61f}\x{640}\x{64b}-\x{65e}\x{66a}-\x{66d}\x{670}\x{6d4}\x{6d6}-\x{6ed}'.
+'\x{6fd}\x{6fe}\x{700}-\x{70f}\x{711}\x{730}-\x{74a}\x{7a6}-\x{7b0}\x{901}-'.
+'\x{903}\x{93c}\x{93e}-\x{94d}\x{951}-\x{954}\x{962}-\x{965}\x{970}\x{981}-'.
+'\x{983}\x{9bc}\x{9be}-\x{9cd}\x{9d7}\x{9e2}\x{9e3}\x{9f2}-\x{a03}\x{a3c}-'.
+'\x{a4d}\x{a70}\x{a71}\x{a81}-\x{a83}\x{abc}\x{abe}-\x{acd}\x{ae2}\x{ae3}'.
+'\x{af1}-\x{b03}\x{b3c}\x{b3e}-\x{b57}\x{b70}\x{b82}\x{bbe}-\x{bd7}\x{bf0}-'.
+'\x{c03}\x{c3e}-\x{c56}\x{c82}\x{c83}\x{cbc}\x{cbe}-\x{cd6}\x{d02}\x{d03}'.
+'\x{d3e}-\x{d57}\x{d82}\x{d83}\x{dca}-\x{df4}\x{e31}\x{e34}-\x{e3f}\x{e46}-'.
+'\x{e4f}\x{e5a}\x{e5b}\x{eb1}\x{eb4}-\x{ebc}\x{ec6}-\x{ecd}\x{f01}-\x{f1f}'.
+'\x{f2a}-\x{f3f}\x{f71}-\x{f87}\x{f90}-\x{fd1}\x{102c}-\x{1039}\x{104a}-'.
+'\x{104f}\x{1056}-\x{1059}\x{10fb}\x{10fc}\x{135f}-\x{137c}\x{1390}-\x{1399}'.
+'\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16f0}\x{1712}-\x{1714}'.
+'\x{1732}-\x{1736}\x{1752}\x{1753}\x{1772}\x{1773}\x{17b4}-\x{17db}\x{17dd}'.
+'\x{17f0}-\x{180e}\x{1843}\x{18a9}\x{1920}-\x{1945}\x{19b0}-\x{19c0}\x{19c8}'.
+'\x{19c9}\x{19de}-\x{19ff}\x{1a17}-\x{1a1f}\x{1d2c}-\x{1d61}\x{1d78}\x{1d9b}-'.
+'\x{1dc3}\x{1fbd}\x{1fbf}-\x{1fc1}\x{1fcd}-\x{1fcf}\x{1fdd}-\x{1fdf}\x{1fed}-'.
+'\x{1fef}\x{1ffd}-\x{2070}\x{2074}-\x{207e}\x{2080}-\x{2101}\x{2103}-\x{2106}'.
+'\x{2108}\x{2109}\x{2114}\x{2116}-\x{2118}\x{211e}-\x{2123}\x{2125}\x{2127}'.
+'\x{2129}\x{212e}\x{2132}\x{213a}\x{213b}\x{2140}-\x{2144}\x{214a}-\x{2b13}'.
+'\x{2ce5}-\x{2cff}\x{2d6f}\x{2e00}-\x{3005}\x{3007}-\x{303b}\x{303d}-\x{303f}'.
+'\x{3099}-\x{309e}\x{30a0}\x{30fb}-\x{30fe}\x{3190}-\x{319f}\x{31c0}-\x{31cf}'.
+'\x{3200}-\x{33ff}\x{4dc0}-\x{4dff}\x{a015}\x{a490}-\x{a716}\x{a802}\x{a806}'.
+'\x{a80b}\x{a823}-\x{a82b}\x{d800}-\x{f8ff}\x{fb1e}\x{fb29}\x{fd3e}\x{fd3f}'.
+'\x{fdfc}-\x{fe6b}\x{feff}-\x{ff0f}\x{ff1a}-\x{ff20}\x{ff3b}-\x{ff40}\x{ff5b}-'.
+'\x{ff65}\x{ff70}\x{ff9e}\x{ff9f}\x{ffe0}-\x{fffd}');
 
 /**
  * Matches all 'N' Unicode character classes (numbers)
  */
-define('PREG_CLASS_NUMBERS', '\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}\x{10107}-\x{10133}\x{10320}-\x{10323}\x{1034a}\x{104a0}-\x{104a9}\x{1d7ce}-\x{1d7ff}');
+define('PREG_CLASS_NUMBERS',
+'\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}'.
+'\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}'.
+'\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}'.
+'\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-'.
+'\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}'.
+'\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}'.
+'\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}'.
+'\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-'.
+'\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}');
 
 /**
  * Matches all 'P' Unicode character classes (punctuation)
  */
-define('PREG_CLASS_PUNCTUATION', '\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-\x{ff65}\x{10100}\x{10101}\x{1039f}');
+define('PREG_CLASS_PUNCTUATION',
+'\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}'.
+'\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}'.
+'\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}'.
+'\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}'.
+'\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}'.
+'\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}'.
+'\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}'.
+'\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}'.
+'\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-'.
+'\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}'.
+'\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}'.
+'\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}'.
+'\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}'.
+'\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-'.
+'\x{ff65}');
+
+/**
+ * Matches all CJK characters that are candidates for auto-splitting
+ * (Chinese, Japanese, Korean).
+ * Contains kana and BMP ideographs.
+ */
+define('PREG_CLASS_CJK', '\x{3041}-\x{30ff}\x{31f0}-\x{31ff}\x{3400}-\x{4db5}'.
+'\x{4e00}-\x{9fbb}\x{f900}-\x{fad9}');
 
 /**
  * Implementation of hook_help().
@@ -56,9 +104,9 @@ function search_help($section = 'admin/help#search') {
     case 'search#noresults':
       return t('<p><ul>
 <li>Check if your spelling is correct.</li>
-<li>Try using wildcards: <em>walk*</em> matches <em>walker</em>, <em>walking</em>, ...</li>
-<li>Use longer words (words shorter than %number letters are ignored).</li>
-</ul></p>', array('%number' => variable_get('minimum_word_size', 3)));
+<li>Remove quotes around phrases to match each word individually: <em>"blue smurf"</em> will match less than <em>blue smurf</em>.</li>
+<li>Consider loosening your query with <em>OR</em>: <em>blue smurf</em> will match less than <em>blue OR smurf</em>.</li>
+</ul></p>');
   }
 }
 
@@ -120,7 +168,7 @@ function search_menu($may_cache) {
  */
 function search_settings_form_validate($form_id, &$form) {
   // If the word length settings change, the index needs to be rebuilt.
-  if (variable_get('minimum_word_size', 3) != $form['minimum_word_size']) {
+  if (variable_get('minimum_word_size', 4) != $form['minimum_word_size']) {
     drupal_set_message(t('The index will be rebuilt.'));
     search_wipe();
   }
@@ -154,9 +202,11 @@ function search_settings() {
   // Indexing settings:
   $form['indexing_settings'] = array('#type' => 'fieldset', '#title' => t('Indexing settings'));
   $form['indexing_settings']['info'] = array('#type' => 'markup', '#value' => '<em>'. t('<p>Changing the setting below will cause the site index to be rebuilt. The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed.</p><p>The default settings should be appropriate for the majority of sites.</p>') .'</em>');
-  $form['indexing_settings']['minimum_word_size'] = array('#type' => 'textfield', '#title' => t('Minimum word length to index'), '#default_value' => variable_get('minimum_word_size', 3), '#size' => 5, '#maxlength' => 3, '#description' => t('The number of characters a word has to be to be indexed. Words shorter than this will not be searchable.'));
+  $form['indexing_settings']['minimum_word_size'] = array('#type' => 'textfield', '#title' => t('Minimum word length to index'), '#default_value' => variable_get('minimum_word_size', 4), '#size' => 5, '#maxlength' => 3, '#description' => t('The number of characters a word has to be to be indexed. A lower setting means better search result ranking, but also a larger database. Each search query must contain at least one keyword that is this size (or longer).'));
   $form['indexing_settings']['remove_short'] = array('#type' => 'textfield', '#title' => t('Minimum word length to search for'), '#default_value' => variable_get('remove_short', 3), '#size' => 5, '#maxlength' => 3, '#description' => t('The number of characters a word has to be to be searched for, including wildcard characters.'));
 
+  // Per module settings
+  $form = array_merge($form, module_invoke_all('search', 'admin'));
   return $form;
 }
 
@@ -174,6 +224,7 @@ function search_wipe($sid = NULL, $type = NULL) {
     module_invoke_all('search', 'reset');
   }
   else {
+    db_query("DELETE FROM {search_dataset} WHERE sid = %d AND type = '%s'", $sid, $type);
     db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type);
     db_query("DELETE FROM {search_index} WHERE fromsid = %d AND fromtype = '%s'", $sid, $type);
   }
@@ -205,12 +256,15 @@ function search_cron() {
   foreach (module_list() as $module) {
     module_invoke($module, 'update_index');
   }
-  // Update word counts for new/changed words
+  // Update word IDF (Inverse Document Frequency) counts for new/changed words
   foreach (search_dirty() as $word => $dummy) {
+    // Get total count
     $total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word));
-    db_query("UPDATE {search_total} SET count = %d WHERE word = '%s'", $total, $word);
+    // Apply Zipf's law to equalize the probability distribution
+    $total = log10(1 + 1/(max(1, $total)));
+    db_query("UPDATE {search_total} SET count = %f WHERE word = '%s'", $total, $word);
     if (!db_affected_rows()) {
-      db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %d)", $word, $total);
+      db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %f)", $word, $total);
     }
   }
   // Find words that were deleted from search_index, but are still in
@@ -223,22 +277,21 @@ function search_cron() {
 }
 
 /**
- * Splits a string into component words according to indexing rules.
+ * Simplifies a string according to indexing rules.
  */
-function search_keywords_split($text) {
-  static $last = null;
-  static $lastsplit = null;
-
-  if ($last == $text) {
-    return $lastsplit;
-  }
-
+function search_simplify($text) {
   // Decode entities to UTF-8
   $text = decode_entities($text);
 
+  // Lowercase
+  $text = drupal_strtolower($text);
+
   // Call an external processor for word handling.
   search_preprocess($text);
 
+  // Baseline CJK handling
+  $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);
+
   // To improve searching for numerical data such as dates, IP addresses
   // or version numbers, we consider a group of numerical characters
   // separated only by punctuation characters to be one piece.
@@ -255,9 +308,44 @@ function search_keywords_split($text) {
   // marks, spacers, etc, to be a word boundary.
   $text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE . ']+/u', ' ', $text);
 
+  return $text;
+}
+
+/**
+ * Basic CJK tokenizer. Simply splits a string into consecutive, overlapping
+ * pairs of characters.
+ */
+function search_expand_cjk($matches) {
+  $tokens = ' ';
+  // Split off first character
+  $last = drupal_substr($matches[0], 0, 1);
+  $str = substr($matches[0], strlen($last));
+  // Begin loop
+  $l = drupal_strlen($str);
+  for ($i = 0; $i < $l; ++$i) {
+    // Grab next character
+    $current = drupal_substr($str, 0, 1);
+    $str = substr($str, strlen($last));
+    $tokens .= $last . $current .' ';
+    $last = $current;
+  }
+  return $tokens;
+}
+
+/**
+ * Splits a string into tokens for indexing.
+ */
+function search_index_split($text) {
+  static $last = null;
+  static $lastsplit = null;
+
+  if ($last == $text) {
+    return $lastsplit;
+  }
   // Process words
+  $text = search_simplify($text);
   $words = explode(' ', $text);
-  array_walk($words, '_search_keywords_truncate');
+  array_walk($words, '_search_index_truncate');
 
   // Save last keyword result
   $last = $text;
@@ -267,27 +355,12 @@ function search_keywords_split($text) {
 }
 
 /**
- * Helper function for array_walk in search_keywords_split.
+ * Helper function for array_walk in search_index_split.
  */
-function _search_keywords_truncate(&$text) {
+function _search_index_truncate(&$text) {
   $text = truncate_utf8($text, 50);
 }
 
-/**
- * Loosens up a set of search keywords by adding wildcards, if possible.
- *
- * @param $text
- *   The keywords as entered by the user.
- * @return
- *   If more wildcards can be added, the adjusted keywords are returned.
- *   If the query is already as loose as possible, NULL is returned.
- */
-function search_keywords_variation($text) {
-  $text = trim($text);
-  $new = preg_replace('/\*+/', '*', '*'. implode('* *', explode(' ', trim($text))) .'*');
-  return ($new != $text) ? $new : NULL;
-}
-
 /**
  * Invokes hook_search_preprocess() in modules.
  */
@@ -297,7 +370,6 @@ function search_preprocess(&$text) {
   }
 }
 
-
 /**
  * Update the full-text search index for a particular item.
  *
@@ -313,23 +385,25 @@ function search_preprocess(&$text) {
  * @ingroup search
  */
 function search_index($sid, $type, $text) {
-  $minimum_word_size = variable_get('minimum_word_size', 3);
+  $minimum_word_size = variable_get('minimum_word_size', 4);
 
+  // Link matching
   global $base_url;
-  $node_regexp = '!href=[\'"]?(?:'. preg_quote($base_url) .'/)?(?:\?q=)?([^\'">]+)[\'">]!i';
+  $node_regexp = '@href=[\'"]?(?:'. preg_quote($base_url, '@') .'/)?(?:\?q=)?/?((?![a-z]+:)[^\'">]+)[\'">]@i';
 
   // Multipliers for scores of words inside certain HTML tags.
   // Note: 'a' must be included for link ranking to work.
-  $tags = array('h1' => 21,
+  $tags = array('h1' => 25,
                 'h2' => 18,
                 'h3' => 15,
                 'h4' => 12,
                 'h5' => 9,
                 'h6' => 6,
-                'u' => 5,
-                'b' => 5,
-                'strong' => 5,
-                'em' => 5,
+                'u' => 3,
+                'b' => 3,
+                'i' => 3,
+                'strong' => 3,
+                'em' => 3,
                 'a' => 10);
 
   // Strip off all ignored tags to speed up processing, but insert space before/after
@@ -345,24 +419,46 @@ function search_index($sid, $type, $text) {
   $tag = false; // Odd/even counter. Tag or no tag.
   $link = false; // State variable for link analyser
   $score = 1; // Starting score per word
+  $accum = ' '; // Accumulator for cleaned up data
+  $tagstack = array(); // Stack with open tags
+  $tagwords = 0; // Counter for consecutive words
+  $focus = 1; // Focus state
 
-  $results = array(0 => array());
+  $results = array(0 => array()); // Accumulator for words for index
 
   foreach ($split as $value) {
     if ($tag) {
       // Increase or decrease score per word based on tag
       list($tagname) = explode(' ', $value, 2);
       $tagname = drupal_strtolower($tagname);
+      // Closing or opening tag?
       if ($tagname{0} == '/') {
-        $score -= $tags[substr($tagname, 1)];
-        if ($score < 1) { // possible due to bad HTML
+        $tagname = substr($tagname, 1);
+        // If we encounter unexpected tags, reset score to avoid incorrect boosting.
+        if (!count($tagstack) || $tagstack[0] != $tagname) {
+          $tagstack = array();
           $score = 1;
         }
-        if ($tagname == '/a') {
+        else {
+          // Remove from tag stack and decrement score
+          $score = max(1, $score - $tags[array_shift($tagstack)]);
+        }
+        if ($tagname == 'a') {
           $link = false;
         }
       }
       else {
+        if ($tagstack[0] == $tagname) {
+          // None of the tags we look for make sense when nested identically.
+          // If they are, it's probably broken HTML.
+          $tagstack = array();
+          $score = 1;          
+        }
+        else {
+          // Add to open tag stack and increment score
+          array_unshift($tagstack, $tagname);
+          $score += $tags[$tagname];
+        }
         if ($tagname == 'a') {
           // Check if link points to a node on this site
           if (preg_match($node_regexp, $value, $match)) {
@@ -370,32 +466,60 @@ function search_index($sid, $type, $text) {
             if (preg_match('!(?:node|book)/(?:view/)?([0-9]+)!i', $path, $match)) {
               $linknid = $match[1];
               if ($linknid > 0) {
-                $link = true;
+                // Note: ignore links to uncachable nodes to avoid redirect bugs.
+                $node = db_fetch_object(db_query('SELECT n.title, n.nid, n.vid, r.format FROM {node} n INNER JOIN {node_revisions} r ON n.vid = r.vid WHERE n.nid = %d', $linknid));
+                if (filter_format_allowcache($node->format)) {
+                  $link = true;
+                  $linktitle = $node->title;
+                }
               }
             }
           }
         }
-        $score += $tags[$tagname];
       }
+      // A tag change occurred, reset counter.
+      $tagwords = 0;
     }
     else {
       // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
       if ($value != '') {
-        $words = search_keywords_split($value);
+        if ($link) {
+          // Check to see if the node link text is its URL. If so, we use the target node title instead.
+          if (preg_match('!^https?://!i', $value)) {
+            $value = $linktitle;
+          }
+        }
+        $words = search_index_split($value);
         foreach ($words as $word) {
+          // Add word to accumulator
+          $accum .= $word .' ';
+          $num = is_numeric($word);
           // Check wordlength
-          if (drupal_strlen($word) >= $minimum_word_size) {
-            $word = drupal_strtolower($word);
+          if ($num || drupal_strlen($word) >= $minimum_word_size) {
+            // Normalize numbers
+            if ($num) {
+              $word = (int)ltrim($word, '-0');
+            }
+
             if ($link) {
               if (!isset($results[$linknid])) {
                 $results[$linknid] = array();
               }
-              $results[$linknid][$word] += $score;
+              $results[$linknid][$word] += $score * $focus;
             }
             else {
-              $results[0][$word] += $score;
+              $results[0][$word] += $score * $focus;
+              // Focus is a decaying value in terms of the amount of unique words up to this point.
+              // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words.
+              $focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015));
             }
           }
+          $tagwords++;
+          // Too many words inside a single tag probably mean a tag was accidentally left open.
+          if (count($tagstack) && $tagwords >= 15) {
+            $tagstack = array();
+            $score = 1;
+          }
         }
       }
     }
@@ -404,6 +528,9 @@ function search_index($sid, $type, $text) {
 
   search_wipe($sid, $type);
 
+  // Insert cleaned up data into dataset
+  db_query("INSERT INTO {search_dataset} (sid, type, data) VALUES (%d, '%s', '%s')", $sid, $type, $accum);
+
   // Insert results into search index
   foreach ($results[0] as $word => $score) {
     db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %d)", $word, $sid, $type, $score);
@@ -420,21 +547,173 @@ function search_index($sid, $type, $text) {
   }
 }
 
+/**
+ * Extract a module-specific search option from a search query. e.g. 'type:book'
+ */
+function search_query_extract($keys, $option) {
+  if (preg_match('/(^| )'. $option .':([^ ]*)( |$)/i', $keys, $matches)) {
+    return $matches[2];
+  }
+}
+
+/**
+ * Return a query with the given module-specific search option inserted in.
+ * e.g. 'type:book'.
+ */
+function search_query_insert($keys, $option, $value = '') {
+  if (search_query_extract($keys, $option)) {
+    $keys = trim(preg_replace('/(^| )'. $option .':[^ ]*/i', '', $keys));
+  }
+  if ($value != '') {
+    $keys .= ' '. $option .':'. $value;
+  }
+  return $keys;
+}
+
+/**
+ * Parse a search query into SQL conditions.
+ *
+ * We build a query that matches the dataset bodies
+ */
+function search_parse_query($text) {
+  $keys = array('positive' => array(), 'negative' => array());
+
+  // Tokenize query string
+  preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' '. $text, $matches, PREG_SET_ORDER);
+
+  if (count($matches) < 1) {
+    return NULL;
+  }
+
+  // Classify tokens
+  $or = false;
+  foreach ($matches as $match) {
+    // Strip off quotes
+    if ($match[2]{0} == '"') {
+      $match[2] = substr($match[2], 1, -1);
+    }
+    // Simplify keyword according to indexing rules
+    $match[2] = search_simplify($match[2]);
+    // Negative matches
+    if ($match[1] == '-') {
+      $keys['negative'][] = $match[2];
+    }
+    // OR operator: instead of a single keyword, we store an array of all
+    // OR'd keywords.
+    elseif ($match[2] == 'OR' && count($keys['positive'])) {
+      $keys['positive'][] = array(array_pop($keys['positive']));
+      $or = true;
+      continue;
+    }
+    // Plain keyword
+    else {
+      if ($or) {
+        $keys['positive'][count($keys['positive']) - 1][] = $match[2];
+      }
+      else {
+        $keys['positive'][] = $match[2];
+      }
+    }
+    $or = false;
+  }
+
+  // Convert keywords into SQL statements.
+  $scorewords = array();
+  $query = array();
+  $query2 = array();
+  $arguments = array();
+  $arguments2 = array();
+  $matches = 0; // Counts the minimal number of words per item must match in the index.
+  // Positive matches
+  foreach ($keys['positive'] as $key) {
+    // Group of ORed terms
+    if (is_array($key) && count($key)) {
+      $queryor = array();
+      foreach ($key as $or) {
+        $q = _search_parse_query($or, $scorewords);
+        if ($q) {
+          $queryor[] = $q;
+          $arguments[] = $or;
+        }
+      }
+      if (count($queryor)) {
+        $query[] = '('. implode(' OR ', $queryor) .')';
+      }
+    }
+    // Single ANDed term
+    else {
+      $q = _search_parse_query($key, $scorewords);
+      if ($q) {
+        $query[] = $q;
+        $arguments[] = $key;
+      }
+    }
+    $matches++;
+  }
+  foreach ($keys['negative'] as $key) {
+    $q = _search_parse_query($key, $scorewords, true);
+    if ($q) {
+      $query[] = $q;
+      $arguments[] = $key;
+    }
+  }
+  // We separate word-index conditions because they are not needed in the
+  // counting query.
+  foreach ($scorewords as $word) {
+    $query2[] = "i.word = '%s'";
+    $arguments2[] = $word;
+  }
+  $query = implode(' AND ', $query);
+  $query2 = implode(' OR ', $query2);
+  return array($query, $arguments, $query2, $arguments2, $matches);
+}
+
+/**
+ * Helper function for search_parse_query();
+ */
+function _search_parse_query(&$word, &$scores, $not = false) {
+  // Determine the scorewords of this word/phrase
+  if (!$not) {
+    $split = explode(' ', $word);
+    foreach ($split as $s) {
+      $num = is_numeric($s);
+      if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 4)) {
+        $scores[] = $num ? ((int)ltrim($word, '-0')) : $s;
+      }
+    }
+  }
+  // Return matching snippet
+  return "d.data ". ($not ? 'NOT ' : '') ."LIKE '%% %s %%'";
+}
+
 /**
  * Do a query on the full-text search index for a word or words.
  *
  * This function is normally only called by each module that support the
  * indexed search (and thus, implements hook_update_index()).
  *
- * The final query is an SQL select on the search_index table. As a guide for
- * writing the optional extra SQL fragments (see below), use this query:
+ * Two queries are performed which can be extended by the caller.
+ *
+ * The first query selects a set of possible matches based on the search index
+ * and any extra given restrictions. This is the classic "OR" search.
  *
- * SELECT i.type, i.sid, i.word, SUM(i.score/t.count) AS score
+ * SELECT i.type, i.sid, SUM(i.score*t.count) AS relevance
  * FROM {search_index} i
- * $join INNER JOIN {search_total} t ON i.word = t.word
- * WHERE $where AND (i.word = '...' OR ...)
+ * INNER JOIN {search_total} t ON i.word = t.word
+ * $join1
+ * WHERE $where1 AND (...)
  * GROUP BY i.type, i.sid
- * ORDER BY score DESC";
+ *
+ * The second query further refines this set by verifying advanced text
+ * conditions (such as AND, negative or phrase matches), and orders the results
+ * on a the column or expression 'score':
+ *
+ * SELECT i.type, i.sid, $select2
+ * FROM temp_search_sids i
+ * INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type
+ * $join2
+ * WHERE (...)
+ * ORDER BY score DESC
  *
  * @param $keywords
  *   A search string as entered by the user.
@@ -442,85 +721,69 @@ function search_index($sid, $type, $text) {
  * @param $type
  *   A string identifying the calling module.
  *
- * @param $join
- *   (optional) A string to be inserted into the JOIN part of the SQL query.
+ * @param $join1
+ *   (optional) Inserted into the JOIN part of the first SQL query.
  *   For example "INNER JOIN {node} n ON n.nid = i.sid".
  *
- * @param $where
- *   (optional) A string to be inserted into the WHERE part of the SQL query.
- *   For example "(n.status > 0)".
+ * @param $where1
+ *   (optional) Inserted into the WHERE part of the first SQL query.
+ *   For example "(n.status > %d)".
+ *
+ * @param $arguments1
+ *   (optional) Extra SQL arguments belonging to the first query.
+ *
+ * @param $select2
+ *   (optional) Inserted into the SELECT pat of the second query. Must contain
+ *   a column selected as 'score'.
+ *   defaults to 'i.relevance AS score'
  *
- * @param $variation
- *   Used internally. Must not be specified.
+ * @param $join2
+ *   (optional) Inserted into the JOIN par of the second SQL query.
+ *   For example "INNER JOIN {node_comment_statistics} n ON n.nid = i.sid"
+ *
+ * @param $arguments2
+ *   (optional) Extra SQL arguments belonging to the second query parameter.
  *
  * @return
  *   An array of SIDs for the search results.
  *
  * @ingroup search
  */
-function do_search($keywords, $type, $join = '', $where = '1', $variation = true) {
-  // Note, we replace the wildcards with U+FFFD (Replacement character) to pass
-  // through the keyword extractor. Multiple wildcards are collapsed into one.
-  $keys = preg_replace('!\*+!', '�', $keywords);
-
-  // Split into words
-  $keys = search_keywords_split($keys);
+function do_search($keywords, $type, $join1 = '', $where1 = '1', $arguments1 = array(), $select2 = 'i.relevance AS score', $join2 = '', $arguments2 = array()) {
+  $query = search_parse_query($keywords);
 
-  $words = array();
-  $arguments = array();
-  $refused = array();
-  // Build WHERE clause
-  foreach ($keys as $word) {
-    if (drupal_strlen($word) < variable_get('remove_short', 3)) {
-      if ($word != '') {
-        $refused[] = str_replace('�', '*', $word);
-      }
-      continue;
-    }
-    if (strpos($word, '�') !== false) {
-      $words[] = "i.word LIKE '%s'";
-      $arguments[] = str_replace('�', '%', drupal_strtolower($word));
-    }
-    else {
-      $words[] = "i.word = '%s'";
-      $arguments[] = drupal_strtolower($word);
-    }
-  }
-  // Tell the user which words were excluded
-  if (count($refused) && $variation) {
-    $message = format_plural(count($refused),
-                             'The word %words was not included because it is too short.',
-                             'The words %words were not included because they were too short.');
-    drupal_set_message(strtr($message, array('%words' => theme('placeholder', implode(', ', $refused)))));
+  if ($query === NULL || $query[0] == '' || $query[2] == '') {
+    return array();
   }
 
-  if (count($words) == 0) {
+  // First pass: select all possible matching sids, doing a simple index-based OR matching on the keywords.
+  // 'matches' is used to reject those items that cannot possibly match the query.
+  $conditions = $where1 .' AND ('. $query[2] .") AND i.type = '%s'";
+  $arguments = array_merge($arguments1, $query[3], array($type, $query[4]));
+  $result = db_query_temporary("SELECT i.type, i.sid, SUM(i.score * t.count) AS relevance, COUNT(*) AS matches FROM {search_index} i INNER JOIN {search_total} t ON i.word = t.word $join1 WHERE $conditions GROUP BY i.type, i.sid HAVING matches >= %d", $arguments, 'temp_search_sids');
+
+  // Calculate maximum relevance, to normalize it
+  $normalize = db_result(db_query('SELECT MAX(relevance) FROM temp_search_sids'));
+  if (!$normalize) {
     return array();
   }
-  $conditions = $where .' AND ('. implode(' OR ', $words) .')';
+  $select2 = str_replace('i.relevance', '('. (1.0 / $normalize) .' * i.relevance)', $select2);
 
-  // Get result count (for pager)
-  $count = db_num_rows(db_query("SELECT DISTINCT i.sid, i.type FROM {search_index} i $join WHERE $conditions", $arguments));
-  if ($count == 0) {
-    // Try out a looser search query if nothing was found.
-    if ($variation && $loose = search_keywords_variation($keywords)) {
-      return do_search($loose, $type, $join, $where, false);
-    }
-    else {
-      return array();
-    }
+  // Second pass: only keep items that match the complicated keywords conditions (phrase search, negative keywords, ...)
+  $conditions = '('. $query[0] .')';
+  $arguments = array_merge($arguments2, $query[1]);
+  $result = db_query_temporary("SELECT i.type, i.sid, $select2 FROM temp_search_sids i INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type $join2 WHERE $conditions ORDER BY score DESC", $arguments, 'temp_search_results');
+  if (($count = db_result(db_query('SELECT COUNT(*) FROM temp_search_results'))) == 0) {
+    return array();
   }
   $count_query = "SELECT $count";
 
-  // Do pager query
-  $query = "SELECT i.type, i.sid, SUM(i.score/t.count) AS score FROM {search_index} i $join INNER JOIN {search_total} t ON i.word = t.word WHERE $conditions GROUP BY i.type, i.sid ORDER BY score DESC";
-  $result = pager_query($query, 15, 0, $count_query, $arguments);
-
+  // Do actual search query
+  $result = pager_query("SELECT * FROM temp_search_results", 10, 0, $count_query, $arguments);
   $results = array();
   while ($item = db_fetch_object($result)) {
     $results[] = $item->sid;
   }
-
   return $results;
 }
 
@@ -543,11 +806,12 @@ function search_view() {
   // Search form submits with POST but redirects to GET. This way we can keep
   // the search query URL clean as a whistle:
   // search/type/keyword+keyword
-  if ($_POST['edit']['keys']) {
+  if (isset($_POST['op'])) {
     if ($type == '') {
       $type = 'node';
     }
-    drupal_goto('search/'. urlencode($type) .'/'. urlencode($_POST['edit']['keys']));
+    $keys = module_invoke($type, 'search', 'post', $_POST['edit']['keys']);
+    drupal_goto('search/'. urlencode($type) .'/'. urlencode(is_null($keys) ? $_POST['edit']['keys'] : $keys));
   }
   else if ($type == '') {
     // Note: search/node can not be a default tab because it would take on the
@@ -647,21 +911,17 @@ function search_form($action = '', $keys = '', $type = null, $prompt = null) {
     $prompt = t('Enter your keywords');
   }
 
+  $form = array();
   $form['#action'] = $action;
-  $form['prompt'] = array('#type' => 'item', '#title' => $prompt);
-  $form['keys'] = array('#type' => 'textfield', '#title' => '', '#default_value' => $keys, '#size' => $prompt ? 40 : 30, '#maxlength' => 255);
-  $form['submit'] = array('#type' => 'submit', '#value' => t('Search'));
   $form['#attributes'] = array('class' => 'search-form');
+  $form['basic'] = array('#type' => 'item', '#title' => $prompt);
+  $form['basic']['inline'] = array('#type' => 'markup', '#prefix' => '<div class="container-inline">', '#suffix' => '</div>');
+  $form['basic']['inline']['keys'] = array('#type' => 'textfield', '#title' => '', '#default_value' => $keys, '#size' => $prompt ? 40 : 30, '#maxlength' => 255);
+  $form['basic']['inline']['submit'] = array('#type' => 'submit', '#value' => t('Search'));
 
-  return drupal_get_form('search_form', $form);
-}
+  $form = array_merge($form, module_invoke($type, 'search', 'form', $keys));
 
-function theme_search_form($form) {
-  $output  = form_render($form['prompt']);
-  $output .= '<div class="container-inline">';
-  $output .= form_render($form);
-  $output .= '</div>';
-  return $output;
+  return drupal_get_form('search_form', $form);
 }
 
 /**
@@ -692,8 +952,7 @@ function search_data($keys = NULL, $type = 'node') {
  * Used for formatting search results.
  *
  * @param $keys
- *   A string containing keywords. They are split into words using the same
- *   rules as search indexing.
+ *   A string containing a search query.
  *
  * @param $text
  *   The text to extract fragments from.
@@ -702,7 +961,11 @@ function search_data($keys = NULL, $type = 'node') {
  *   A string containing HTML for the excerpt.
  */
 function search_excerpt($keys, $text) {
-  $keys = search_keywords_split($keys);
+  // Extract positive keywords and phrases
+  preg_match_all('/ ("([^"]+)"|(?!OR)([^" ]+))/', ' '. $keys, $matches);
+  $keys = array_merge($matches[2], $matches[3]);
+
+  // Prepare text
   $text = strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text));
   array_walk($keys, '_search_excerpt_replace');
   $workkeys = $keys;
@@ -718,6 +981,7 @@ function search_excerpt($keys, $text) {
     foreach ($workkeys as $k => $key) {
       if (strlen($key) == 0) {
         unset($workkeys[$k]);
+        unset($keys[$k]);
         continue;
       }
       if ($length >= 256) {
diff --git a/modules/search/search.module b/modules/search/search.module
index adc020eb57a808c68844e2367bd830aa78942029..e26f7045e2fba386a4718c57e3c2e5cf50b4ad08 100644
--- a/modules/search/search.module
+++ b/modules/search/search.module
@@ -15,32 +15,80 @@
  * Lu     Letter, Uppercase
  * Ll     Letter, Lowercase
  * Lt     Letter, Titlecase
- * Lm     Letter, Modifier
  * Lo     Letter, Other
- * Mn     Mark, Nonspacing
- * Mc     Mark, Spacing Combining
  * Nd     Number, Decimal Digit
- * Nl     Number, Letter
  * No     Number, Other
- * Sm     Symbol, Math
- * Sc     Symbol, Currency
- * Sk     Symbol, Modifier
- * So     Symbol, Other
- *
- * All character classes not in the list above (enclosing marks, punctuation, control codes and spacers):
- * 'Me', 'Pc', 'Pd', 'Ps', 'Pe', 'Pi', 'Pf', 'Po', 'Zs', 'Zl', 'Zp', 'Cc', 'Cf', 'Cs', 'Co'
  */
-define('PREG_CLASS_SEARCH_EXCLUDE', '\x{0}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}\x{5f}\x{7b}\x{7d}\x{7f}-\x{a1}\x{ab}\x{ad}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{488}\x{489}\x{55a}-\x{55f}\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{600}-\x{603}\x{60c}\x{60d}\x{61b}\x{61f}\x{66a}-\x{66d}\x{6d4}\x{6dd}\x{6de}\x{700}-\x{70d}\x{70f}\x{964}\x{965}\x{970}\x{df4}\x{e4f}\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}\x{1361}-\x{1368}\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}\x{1736}\x{17b4}\x{17b5}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{180e}\x{1944}\x{1945}\x{2000}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}\x{205f}-\x{2063}\x{206a}-\x{206f}\x{207d}\x{207e}\x{208d}\x{208e}\x{20dd}-\x{20e0}\x{20e2}-\x{20e4}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}\x{3000}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}\x{30fb}\x{d800}\x{db7f}\x{db80}\x{dbff}\x{dc00}\x{dfff}\x{e000}\x{f8ff}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}\x{fe6a}\x{fe6b}\x{feff}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-\x{ff65}\x{fff9}-\x{fffb}\x{10100}\x{10101}\x{1039f}\x{1d173}-\x{1d17a}\x{e0001}\x{e0020}-\x{e007f}\x{f0000}\x{ffffd}\x{100000}');
+define('PREG_CLASS_SEARCH_EXCLUDE',
+'\x{0}-\x{2f}\x{3a}-\x{40}\x{5b}-\x{60}\x{7b}-\x{bf}\x{d7}\x{f7}\x{2b0}-'.
+'\x{385}\x{387}\x{3f6}\x{482}-\x{489}\x{559}-\x{55f}\x{589}-\x{5c7}\x{5f3}-'.
+'\x{61f}\x{640}\x{64b}-\x{65e}\x{66a}-\x{66d}\x{670}\x{6d4}\x{6d6}-\x{6ed}'.
+'\x{6fd}\x{6fe}\x{700}-\x{70f}\x{711}\x{730}-\x{74a}\x{7a6}-\x{7b0}\x{901}-'.
+'\x{903}\x{93c}\x{93e}-\x{94d}\x{951}-\x{954}\x{962}-\x{965}\x{970}\x{981}-'.
+'\x{983}\x{9bc}\x{9be}-\x{9cd}\x{9d7}\x{9e2}\x{9e3}\x{9f2}-\x{a03}\x{a3c}-'.
+'\x{a4d}\x{a70}\x{a71}\x{a81}-\x{a83}\x{abc}\x{abe}-\x{acd}\x{ae2}\x{ae3}'.
+'\x{af1}-\x{b03}\x{b3c}\x{b3e}-\x{b57}\x{b70}\x{b82}\x{bbe}-\x{bd7}\x{bf0}-'.
+'\x{c03}\x{c3e}-\x{c56}\x{c82}\x{c83}\x{cbc}\x{cbe}-\x{cd6}\x{d02}\x{d03}'.
+'\x{d3e}-\x{d57}\x{d82}\x{d83}\x{dca}-\x{df4}\x{e31}\x{e34}-\x{e3f}\x{e46}-'.
+'\x{e4f}\x{e5a}\x{e5b}\x{eb1}\x{eb4}-\x{ebc}\x{ec6}-\x{ecd}\x{f01}-\x{f1f}'.
+'\x{f2a}-\x{f3f}\x{f71}-\x{f87}\x{f90}-\x{fd1}\x{102c}-\x{1039}\x{104a}-'.
+'\x{104f}\x{1056}-\x{1059}\x{10fb}\x{10fc}\x{135f}-\x{137c}\x{1390}-\x{1399}'.
+'\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16f0}\x{1712}-\x{1714}'.
+'\x{1732}-\x{1736}\x{1752}\x{1753}\x{1772}\x{1773}\x{17b4}-\x{17db}\x{17dd}'.
+'\x{17f0}-\x{180e}\x{1843}\x{18a9}\x{1920}-\x{1945}\x{19b0}-\x{19c0}\x{19c8}'.
+'\x{19c9}\x{19de}-\x{19ff}\x{1a17}-\x{1a1f}\x{1d2c}-\x{1d61}\x{1d78}\x{1d9b}-'.
+'\x{1dc3}\x{1fbd}\x{1fbf}-\x{1fc1}\x{1fcd}-\x{1fcf}\x{1fdd}-\x{1fdf}\x{1fed}-'.
+'\x{1fef}\x{1ffd}-\x{2070}\x{2074}-\x{207e}\x{2080}-\x{2101}\x{2103}-\x{2106}'.
+'\x{2108}\x{2109}\x{2114}\x{2116}-\x{2118}\x{211e}-\x{2123}\x{2125}\x{2127}'.
+'\x{2129}\x{212e}\x{2132}\x{213a}\x{213b}\x{2140}-\x{2144}\x{214a}-\x{2b13}'.
+'\x{2ce5}-\x{2cff}\x{2d6f}\x{2e00}-\x{3005}\x{3007}-\x{303b}\x{303d}-\x{303f}'.
+'\x{3099}-\x{309e}\x{30a0}\x{30fb}-\x{30fe}\x{3190}-\x{319f}\x{31c0}-\x{31cf}'.
+'\x{3200}-\x{33ff}\x{4dc0}-\x{4dff}\x{a015}\x{a490}-\x{a716}\x{a802}\x{a806}'.
+'\x{a80b}\x{a823}-\x{a82b}\x{d800}-\x{f8ff}\x{fb1e}\x{fb29}\x{fd3e}\x{fd3f}'.
+'\x{fdfc}-\x{fe6b}\x{feff}-\x{ff0f}\x{ff1a}-\x{ff20}\x{ff3b}-\x{ff40}\x{ff5b}-'.
+'\x{ff65}\x{ff70}\x{ff9e}\x{ff9f}\x{ffe0}-\x{fffd}');
 
 /**
  * Matches all 'N' Unicode character classes (numbers)
  */
-define('PREG_CLASS_NUMBERS', '\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}\x{10107}-\x{10133}\x{10320}-\x{10323}\x{1034a}\x{104a0}-\x{104a9}\x{1d7ce}-\x{1d7ff}');
+define('PREG_CLASS_NUMBERS',
+'\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}'.
+'\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}'.
+'\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}'.
+'\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-'.
+'\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}'.
+'\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}'.
+'\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}'.
+'\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-'.
+'\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}');
 
 /**
  * Matches all 'P' Unicode character classes (punctuation)
  */
-define('PREG_CLASS_PUNCTUATION', '\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-\x{ff65}\x{10100}\x{10101}\x{1039f}');
+define('PREG_CLASS_PUNCTUATION',
+'\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}'.
+'\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}'.
+'\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}'.
+'\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}'.
+'\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}'.
+'\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}'.
+'\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}'.
+'\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}'.
+'\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-'.
+'\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}'.
+'\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}'.
+'\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}'.
+'\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}'.
+'\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-'.
+'\x{ff65}');
+
+/**
+ * Matches all CJK characters that are candidates for auto-splitting
+ * (Chinese, Japanese, Korean).
+ * Contains kana and BMP ideographs.
+ */
+define('PREG_CLASS_CJK', '\x{3041}-\x{30ff}\x{31f0}-\x{31ff}\x{3400}-\x{4db5}'.
+'\x{4e00}-\x{9fbb}\x{f900}-\x{fad9}');
 
 /**
  * Implementation of hook_help().
@@ -56,9 +104,9 @@ function search_help($section = 'admin/help#search') {
     case 'search#noresults':
       return t('<p><ul>
 <li>Check if your spelling is correct.</li>
-<li>Try using wildcards: <em>walk*</em> matches <em>walker</em>, <em>walking</em>, ...</li>
-<li>Use longer words (words shorter than %number letters are ignored).</li>
-</ul></p>', array('%number' => variable_get('minimum_word_size', 3)));
+<li>Remove quotes around phrases to match each word individually: <em>"blue smurf"</em> will match less than <em>blue smurf</em>.</li>
+<li>Consider loosening your query with <em>OR</em>: <em>blue smurf</em> will match less than <em>blue OR smurf</em>.</li>
+</ul></p>');
   }
 }
 
@@ -120,7 +168,7 @@ function search_menu($may_cache) {
  */
 function search_settings_form_validate($form_id, &$form) {
   // If the word length settings change, the index needs to be rebuilt.
-  if (variable_get('minimum_word_size', 3) != $form['minimum_word_size']) {
+  if (variable_get('minimum_word_size', 4) != $form['minimum_word_size']) {
     drupal_set_message(t('The index will be rebuilt.'));
     search_wipe();
   }
@@ -154,9 +202,11 @@ function search_settings() {
   // Indexing settings:
   $form['indexing_settings'] = array('#type' => 'fieldset', '#title' => t('Indexing settings'));
   $form['indexing_settings']['info'] = array('#type' => 'markup', '#value' => '<em>'. t('<p>Changing the setting below will cause the site index to be rebuilt. The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed.</p><p>The default settings should be appropriate for the majority of sites.</p>') .'</em>');
-  $form['indexing_settings']['minimum_word_size'] = array('#type' => 'textfield', '#title' => t('Minimum word length to index'), '#default_value' => variable_get('minimum_word_size', 3), '#size' => 5, '#maxlength' => 3, '#description' => t('The number of characters a word has to be to be indexed. Words shorter than this will not be searchable.'));
+  $form['indexing_settings']['minimum_word_size'] = array('#type' => 'textfield', '#title' => t('Minimum word length to index'), '#default_value' => variable_get('minimum_word_size', 4), '#size' => 5, '#maxlength' => 3, '#description' => t('The number of characters a word has to be to be indexed. A lower setting means better search result ranking, but also a larger database. Each search query must contain at least one keyword that is this size (or longer).'));
   $form['indexing_settings']['remove_short'] = array('#type' => 'textfield', '#title' => t('Minimum word length to search for'), '#default_value' => variable_get('remove_short', 3), '#size' => 5, '#maxlength' => 3, '#description' => t('The number of characters a word has to be to be searched for, including wildcard characters.'));
 
+  // Per module settings
+  $form = array_merge($form, module_invoke_all('search', 'admin'));
   return $form;
 }
 
@@ -174,6 +224,7 @@ function search_wipe($sid = NULL, $type = NULL) {
     module_invoke_all('search', 'reset');
   }
   else {
+    db_query("DELETE FROM {search_dataset} WHERE sid = %d AND type = '%s'", $sid, $type);
     db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type);
     db_query("DELETE FROM {search_index} WHERE fromsid = %d AND fromtype = '%s'", $sid, $type);
   }
@@ -205,12 +256,15 @@ function search_cron() {
   foreach (module_list() as $module) {
     module_invoke($module, 'update_index');
   }
-  // Update word counts for new/changed words
+  // Update word IDF (Inverse Document Frequency) counts for new/changed words
   foreach (search_dirty() as $word => $dummy) {
+    // Get total count
     $total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word));
-    db_query("UPDATE {search_total} SET count = %d WHERE word = '%s'", $total, $word);
+    // Apply Zipf's law to equalize the probability distribution
+    $total = log10(1 + 1/(max(1, $total)));
+    db_query("UPDATE {search_total} SET count = %f WHERE word = '%s'", $total, $word);
     if (!db_affected_rows()) {
-      db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %d)", $word, $total);
+      db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %f)", $word, $total);
     }
   }
   // Find words that were deleted from search_index, but are still in
@@ -223,22 +277,21 @@ function search_cron() {
 }
 
 /**
- * Splits a string into component words according to indexing rules.
+ * Simplifies a string according to indexing rules.
  */
-function search_keywords_split($text) {
-  static $last = null;
-  static $lastsplit = null;
-
-  if ($last == $text) {
-    return $lastsplit;
-  }
-
+function search_simplify($text) {
   // Decode entities to UTF-8
   $text = decode_entities($text);
 
+  // Lowercase
+  $text = drupal_strtolower($text);
+
   // Call an external processor for word handling.
   search_preprocess($text);
 
+  // Baseline CJK handling
+  $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);
+
   // To improve searching for numerical data such as dates, IP addresses
   // or version numbers, we consider a group of numerical characters
   // separated only by punctuation characters to be one piece.
@@ -255,9 +308,44 @@ function search_keywords_split($text) {
   // marks, spacers, etc, to be a word boundary.
   $text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE . ']+/u', ' ', $text);
 
+  return $text;
+}
+
+/**
+ * Basic CJK tokenizer. Simply splits a string into consecutive, overlapping
+ * pairs of characters.
+ */
+function search_expand_cjk($matches) {
+  $tokens = ' ';
+  // Split off first character
+  $last = drupal_substr($matches[0], 0, 1);
+  $str = substr($matches[0], strlen($last));
+  // Begin loop
+  $l = drupal_strlen($str);
+  for ($i = 0; $i < $l; ++$i) {
+    // Grab next character
+    $current = drupal_substr($str, 0, 1);
+    $str = substr($str, strlen($last));
+    $tokens .= $last . $current .' ';
+    $last = $current;
+  }
+  return $tokens;
+}
+
+/**
+ * Splits a string into tokens for indexing.
+ */
+function search_index_split($text) {
+  static $last = null;
+  static $lastsplit = null;
+
+  if ($last == $text) {
+    return $lastsplit;
+  }
   // Process words
+  $text = search_simplify($text);
   $words = explode(' ', $text);
-  array_walk($words, '_search_keywords_truncate');
+  array_walk($words, '_search_index_truncate');
 
   // Save last keyword result
   $last = $text;
@@ -267,27 +355,12 @@ function search_keywords_split($text) {
 }
 
 /**
- * Helper function for array_walk in search_keywords_split.
+ * Helper function for array_walk in search_index_split.
  */
-function _search_keywords_truncate(&$text) {
+function _search_index_truncate(&$text) {
   $text = truncate_utf8($text, 50);
 }
 
-/**
- * Loosens up a set of search keywords by adding wildcards, if possible.
- *
- * @param $text
- *   The keywords as entered by the user.
- * @return
- *   If more wildcards can be added, the adjusted keywords are returned.
- *   If the query is already as loose as possible, NULL is returned.
- */
-function search_keywords_variation($text) {
-  $text = trim($text);
-  $new = preg_replace('/\*+/', '*', '*'. implode('* *', explode(' ', trim($text))) .'*');
-  return ($new != $text) ? $new : NULL;
-}
-
 /**
  * Invokes hook_search_preprocess() in modules.
  */
@@ -297,7 +370,6 @@ function search_preprocess(&$text) {
   }
 }
 
-
 /**
  * Update the full-text search index for a particular item.
  *
@@ -313,23 +385,25 @@ function search_preprocess(&$text) {
  * @ingroup search
  */
 function search_index($sid, $type, $text) {
-  $minimum_word_size = variable_get('minimum_word_size', 3);
+  $minimum_word_size = variable_get('minimum_word_size', 4);
 
+  // Link matching
   global $base_url;
-  $node_regexp = '!href=[\'"]?(?:'. preg_quote($base_url) .'/)?(?:\?q=)?([^\'">]+)[\'">]!i';
+  $node_regexp = '@href=[\'"]?(?:'. preg_quote($base_url, '@') .'/)?(?:\?q=)?/?((?![a-z]+:)[^\'">]+)[\'">]@i';
 
   // Multipliers for scores of words inside certain HTML tags.
   // Note: 'a' must be included for link ranking to work.
-  $tags = array('h1' => 21,
+  $tags = array('h1' => 25,
                 'h2' => 18,
                 'h3' => 15,
                 'h4' => 12,
                 'h5' => 9,
                 'h6' => 6,
-                'u' => 5,
-                'b' => 5,
-                'strong' => 5,
-                'em' => 5,
+                'u' => 3,
+                'b' => 3,
+                'i' => 3,
+                'strong' => 3,
+                'em' => 3,
                 'a' => 10);
 
   // Strip off all ignored tags to speed up processing, but insert space before/after
@@ -345,24 +419,46 @@ function search_index($sid, $type, $text) {
   $tag = false; // Odd/even counter. Tag or no tag.
   $link = false; // State variable for link analyser
   $score = 1; // Starting score per word
+  $accum = ' '; // Accumulator for cleaned up data
+  $tagstack = array(); // Stack with open tags
+  $tagwords = 0; // Counter for consecutive words
+  $focus = 1; // Focus state
 
-  $results = array(0 => array());
+  $results = array(0 => array()); // Accumulator for words for index
 
   foreach ($split as $value) {
     if ($tag) {
       // Increase or decrease score per word based on tag
       list($tagname) = explode(' ', $value, 2);
       $tagname = drupal_strtolower($tagname);
+      // Closing or opening tag?
       if ($tagname{0} == '/') {
-        $score -= $tags[substr($tagname, 1)];
-        if ($score < 1) { // possible due to bad HTML
+        $tagname = substr($tagname, 1);
+        // If we encounter unexpected tags, reset score to avoid incorrect boosting.
+        if (!count($tagstack) || $tagstack[0] != $tagname) {
+          $tagstack = array();
           $score = 1;
         }
-        if ($tagname == '/a') {
+        else {
+          // Remove from tag stack and decrement score
+          $score = max(1, $score - $tags[array_shift($tagstack)]);
+        }
+        if ($tagname == 'a') {
           $link = false;
         }
       }
       else {
+        if ($tagstack[0] == $tagname) {
+          // None of the tags we look for make sense when nested identically.
+          // If they are, it's probably broken HTML.
+          $tagstack = array();
+          $score = 1;          
+        }
+        else {
+          // Add to open tag stack and increment score
+          array_unshift($tagstack, $tagname);
+          $score += $tags[$tagname];
+        }
         if ($tagname == 'a') {
           // Check if link points to a node on this site
           if (preg_match($node_regexp, $value, $match)) {
@@ -370,32 +466,60 @@ function search_index($sid, $type, $text) {
             if (preg_match('!(?:node|book)/(?:view/)?([0-9]+)!i', $path, $match)) {
               $linknid = $match[1];
               if ($linknid > 0) {
-                $link = true;
+                // Note: ignore links to uncachable nodes to avoid redirect bugs.
+                $node = db_fetch_object(db_query('SELECT n.title, n.nid, n.vid, r.format FROM {node} n INNER JOIN {node_revisions} r ON n.vid = r.vid WHERE n.nid = %d', $linknid));
+                if (filter_format_allowcache($node->format)) {
+                  $link = true;
+                  $linktitle = $node->title;
+                }
               }
             }
           }
         }
-        $score += $tags[$tagname];
       }
+      // A tag change occurred, reset counter.
+      $tagwords = 0;
     }
     else {
       // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
       if ($value != '') {
-        $words = search_keywords_split($value);
+        if ($link) {
+          // Check to see if the node link text is its URL. If so, we use the target node title instead.
+          if (preg_match('!^https?://!i', $value)) {
+            $value = $linktitle;
+          }
+        }
+        $words = search_index_split($value);
         foreach ($words as $word) {
+          // Add word to accumulator
+          $accum .= $word .' ';
+          $num = is_numeric($word);
           // Check wordlength
-          if (drupal_strlen($word) >= $minimum_word_size) {
-            $word = drupal_strtolower($word);
+          if ($num || drupal_strlen($word) >= $minimum_word_size) {
+            // Normalize numbers
+            if ($num) {
+              $word = (int)ltrim($word, '-0');
+            }
+
             if ($link) {
               if (!isset($results[$linknid])) {
                 $results[$linknid] = array();
               }
-              $results[$linknid][$word] += $score;
+              $results[$linknid][$word] += $score * $focus;
             }
             else {
-              $results[0][$word] += $score;
+              $results[0][$word] += $score * $focus;
+              // Focus is a decaying value in terms of the amount of unique words up to this point.
+              // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words.
+              $focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015));
             }
           }
+          $tagwords++;
+          // Too many words inside a single tag probably mean a tag was accidentally left open.
+          if (count($tagstack) && $tagwords >= 15) {
+            $tagstack = array();
+            $score = 1;
+          }
         }
       }
     }
@@ -404,6 +528,9 @@ function search_index($sid, $type, $text) {
 
   search_wipe($sid, $type);
 
+  // Insert cleaned up data into dataset
+  db_query("INSERT INTO {search_dataset} (sid, type, data) VALUES (%d, '%s', '%s')", $sid, $type, $accum);
+
   // Insert results into search index
   foreach ($results[0] as $word => $score) {
     db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %d)", $word, $sid, $type, $score);
@@ -420,21 +547,173 @@ function search_index($sid, $type, $text) {
   }
 }
 
+/**
+ * Extract a module-specific search option from a search query. e.g. 'type:book'
+ */
+function search_query_extract($keys, $option) {
+  if (preg_match('/(^| )'. $option .':([^ ]*)( |$)/i', $keys, $matches)) {
+    return $matches[2];
+  }
+}
+
+/**
+ * Return a query with the given module-specific search option inserted in.
+ * e.g. 'type:book'.
+ */
+function search_query_insert($keys, $option, $value = '') {
+  if (search_query_extract($keys, $option)) {
+    $keys = trim(preg_replace('/(^| )'. $option .':[^ ]*/i', '', $keys));
+  }
+  if ($value != '') {
+    $keys .= ' '. $option .':'. $value;
+  }
+  return $keys;
+}
+
+/**
+ * Parse a search query into SQL conditions.
+ *
+ * We build a query that matches the dataset bodies
+ */
+function search_parse_query($text) {
+  $keys = array('positive' => array(), 'negative' => array());
+
+  // Tokenize query string
+  preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' '. $text, $matches, PREG_SET_ORDER);
+
+  if (count($matches) < 1) {
+    return NULL;
+  }
+
+  // Classify tokens
+  $or = false;
+  foreach ($matches as $match) {
+    // Strip off quotes
+    if ($match[2]{0} == '"') {
+      $match[2] = substr($match[2], 1, -1);
+    }
+    // Simplify keyword according to indexing rules
+    $match[2] = search_simplify($match[2]);
+    // Negative matches
+    if ($match[1] == '-') {
+      $keys['negative'][] = $match[2];
+    }
+    // OR operator: instead of a single keyword, we store an array of all
+    // OR'd keywords.
+    elseif ($match[2] == 'OR' && count($keys['positive'])) {
+      $keys['positive'][] = array(array_pop($keys['positive']));
+      $or = true;
+      continue;
+    }
+    // Plain keyword
+    else {
+      if ($or) {
+        $keys['positive'][count($keys['positive']) - 1][] = $match[2];
+      }
+      else {
+        $keys['positive'][] = $match[2];
+      }
+    }
+    $or = false;
+  }
+
+  // Convert keywords into SQL statements.
+  $scorewords = array();
+  $query = array();
+  $query2 = array();
+  $arguments = array();
+  $arguments2 = array();
+  $matches = 0; // Counts the minimal number of words per item must match in the index.
+  // Positive matches
+  foreach ($keys['positive'] as $key) {
+    // Group of ORed terms
+    if (is_array($key) && count($key)) {
+      $queryor = array();
+      foreach ($key as $or) {
+        $q = _search_parse_query($or, $scorewords);
+        if ($q) {
+          $queryor[] = $q;
+          $arguments[] = $or;
+        }
+      }
+      if (count($queryor)) {
+        $query[] = '('. implode(' OR ', $queryor) .')';
+      }
+    }
+    // Single ANDed term
+    else {
+      $q = _search_parse_query($key, $scorewords);
+      if ($q) {
+        $query[] = $q;
+        $arguments[] = $key;
+      }
+    }
+    $matches++;
+  }
+  foreach ($keys['negative'] as $key) {
+    $q = _search_parse_query($key, $scorewords, true);
+    if ($q) {
+      $query[] = $q;
+      $arguments[] = $key;
+    }
+  }
+  // We separate word-index conditions because they are not needed in the
+  // counting query.
+  foreach ($scorewords as $word) {
+    $query2[] = "i.word = '%s'";
+    $arguments2[] = $word;
+  }
+  $query = implode(' AND ', $query);
+  $query2 = implode(' OR ', $query2);
+  return array($query, $arguments, $query2, $arguments2, $matches);
+}
+
+/**
+ * Helper function for search_parse_query();
+ */
+function _search_parse_query(&$word, &$scores, $not = false) {
+  // Determine the scorewords of this word/phrase
+  if (!$not) {
+    $split = explode(' ', $word);
+    foreach ($split as $s) {
+      $num = is_numeric($s);
+      if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 4)) {
+        $scores[] = $num ? ((int)ltrim($word, '-0')) : $s;
+      }
+    }
+  }
+  // Return matching snippet
+  return "d.data ". ($not ? 'NOT ' : '') ."LIKE '%% %s %%'";
+}
+
 /**
  * Do a query on the full-text search index for a word or words.
  *
  * This function is normally only called by each module that support the
  * indexed search (and thus, implements hook_update_index()).
  *
- * The final query is an SQL select on the search_index table. As a guide for
- * writing the optional extra SQL fragments (see below), use this query:
+ * Two queries are performed which can be extended by the caller.
+ *
+ * The first query selects a set of possible matches based on the search index
+ * and any extra given restrictions. This is the classic "OR" search.
  *
- * SELECT i.type, i.sid, i.word, SUM(i.score/t.count) AS score
+ * SELECT i.type, i.sid, SUM(i.score*t.count) AS relevance
  * FROM {search_index} i
- * $join INNER JOIN {search_total} t ON i.word = t.word
- * WHERE $where AND (i.word = '...' OR ...)
+ * INNER JOIN {search_total} t ON i.word = t.word
+ * $join1
+ * WHERE $where1 AND (...)
  * GROUP BY i.type, i.sid
- * ORDER BY score DESC";
+ *
+ * The second query further refines this set by verifying advanced text
+ * conditions (such as AND, negative or phrase matches), and orders the results
+ * on a the column or expression 'score':
+ *
+ * SELECT i.type, i.sid, $select2
+ * FROM temp_search_sids i
+ * INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type
+ * $join2
+ * WHERE (...)
+ * ORDER BY score DESC
  *
  * @param $keywords
  *   A search string as entered by the user.
@@ -442,85 +721,69 @@ function search_index($sid, $type, $text) {
  * @param $type
  *   A string identifying the calling module.
  *
- * @param $join
- *   (optional) A string to be inserted into the JOIN part of the SQL query.
+ * @param $join1
+ *   (optional) Inserted into the JOIN part of the first SQL query.
  *   For example "INNER JOIN {node} n ON n.nid = i.sid".
  *
- * @param $where
- *   (optional) A string to be inserted into the WHERE part of the SQL query.
- *   For example "(n.status > 0)".
+ * @param $where1
+ *   (optional) Inserted into the WHERE part of the first SQL query.
+ *   For example "(n.status > %d)".
+ *
+ * @param $arguments1
+ *   (optional) Extra SQL arguments belonging to the first query.
+ *
+ * @param $select2
+ *   (optional) Inserted into the SELECT pat of the second query. Must contain
+ *   a column selected as 'score'.
+ *   defaults to 'i.relevance AS score'
  *
- * @param $variation
- *   Used internally. Must not be specified.
+ * @param $join2
+ *   (optional) Inserted into the JOIN par of the second SQL query.
+ *   For example "INNER JOIN {node_comment_statistics} n ON n.nid = i.sid"
+ *
+ * @param $arguments2
+ *   (optional) Extra SQL arguments belonging to the second query parameter.
  *
  * @return
  *   An array of SIDs for the search results.
  *
  * @ingroup search
  */
-function do_search($keywords, $type, $join = '', $where = '1', $variation = true) {
-  // Note, we replace the wildcards with U+FFFD (Replacement character) to pass
-  // through the keyword extractor. Multiple wildcards are collapsed into one.
-  $keys = preg_replace('!\*+!', '�', $keywords);
-
-  // Split into words
-  $keys = search_keywords_split($keys);
+function do_search($keywords, $type, $join1 = '', $where1 = '1', $arguments1 = array(), $select2 = 'i.relevance AS score', $join2 = '', $arguments2 = array()) {
+  $query = search_parse_query($keywords);
 
-  $words = array();
-  $arguments = array();
-  $refused = array();
-  // Build WHERE clause
-  foreach ($keys as $word) {
-    if (drupal_strlen($word) < variable_get('remove_short', 3)) {
-      if ($word != '') {
-        $refused[] = str_replace('�', '*', $word);
-      }
-      continue;
-    }
-    if (strpos($word, '�') !== false) {
-      $words[] = "i.word LIKE '%s'";
-      $arguments[] = str_replace('�', '%', drupal_strtolower($word));
-    }
-    else {
-      $words[] = "i.word = '%s'";
-      $arguments[] = drupal_strtolower($word);
-    }
-  }
-  // Tell the user which words were excluded
-  if (count($refused) && $variation) {
-    $message = format_plural(count($refused),
-                             'The word %words was not included because it is too short.',
-                             'The words %words were not included because they were too short.');
-    drupal_set_message(strtr($message, array('%words' => theme('placeholder', implode(', ', $refused)))));
+  if ($query === NULL || $query[0] == '' || $query[2] == '') {
+    return array();
   }
 
-  if (count($words) == 0) {
+  // First pass: select all possible matching sids, doing a simple index-based OR matching on the keywords.
+  // 'matches' is used to reject those items that cannot possibly match the query.
+  $conditions = $where1 .' AND ('. $query[2] .") AND i.type = '%s'";
+  $arguments = array_merge($arguments1, $query[3], array($type, $query[4]));
+  $result = db_query_temporary("SELECT i.type, i.sid, SUM(i.score * t.count) AS relevance, COUNT(*) AS matches FROM {search_index} i INNER JOIN {search_total} t ON i.word = t.word $join1 WHERE $conditions GROUP BY i.type, i.sid HAVING matches >= %d", $arguments, 'temp_search_sids');
+
+  // Calculate maximum relevance, to normalize it
+  $normalize = db_result(db_query('SELECT MAX(relevance) FROM temp_search_sids'));
+  if (!$normalize) {
     return array();
   }
-  $conditions = $where .' AND ('. implode(' OR ', $words) .')';
+  $select2 = str_replace('i.relevance', '('. (1.0 / $normalize) .' * i.relevance)', $select2);
 
-  // Get result count (for pager)
-  $count = db_num_rows(db_query("SELECT DISTINCT i.sid, i.type FROM {search_index} i $join WHERE $conditions", $arguments));
-  if ($count == 0) {
-    // Try out a looser search query if nothing was found.
-    if ($variation && $loose = search_keywords_variation($keywords)) {
-      return do_search($loose, $type, $join, $where, false);
-    }
-    else {
-      return array();
-    }
+  // Second pass: only keep items that match the complicated keywords conditions (phrase search, negative keywords, ...)
+  $conditions = '('. $query[0] .')';
+  $arguments = array_merge($arguments2, $query[1]);
+  $result = db_query_temporary("SELECT i.type, i.sid, $select2 FROM temp_search_sids i INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type $join2 WHERE $conditions ORDER BY score DESC", $arguments, 'temp_search_results');
+  if (($count = db_result(db_query('SELECT COUNT(*) FROM temp_search_results'))) == 0) {
+    return array();
   }
   $count_query = "SELECT $count";
 
-  // Do pager query
-  $query = "SELECT i.type, i.sid, SUM(i.score/t.count) AS score FROM {search_index} i $join INNER JOIN {search_total} t ON i.word = t.word WHERE $conditions GROUP BY i.type, i.sid ORDER BY score DESC";
-  $result = pager_query($query, 15, 0, $count_query, $arguments);
-
+  // Do actual search query
+  $result = pager_query("SELECT * FROM temp_search_results", 10, 0, $count_query, $arguments);
   $results = array();
   while ($item = db_fetch_object($result)) {
     $results[] = $item->sid;
   }
-
   return $results;
 }
 
@@ -543,11 +806,12 @@ function search_view() {
   // Search form submits with POST but redirects to GET. This way we can keep
   // the search query URL clean as a whistle:
   // search/type/keyword+keyword
-  if ($_POST['edit']['keys']) {
+  if (isset($_POST['op'])) {
     if ($type == '') {
       $type = 'node';
     }
-    drupal_goto('search/'. urlencode($type) .'/'. urlencode($_POST['edit']['keys']));
+    $keys = module_invoke($type, 'search', 'post', $_POST['edit']['keys']);
+    drupal_goto('search/'. urlencode($type) .'/'. urlencode(is_null($keys) ? $_POST['edit']['keys'] : $keys));
   }
   else if ($type == '') {
     // Note: search/node can not be a default tab because it would take on the
@@ -647,21 +911,17 @@ function search_form($action = '', $keys = '', $type = null, $prompt = null) {
     $prompt = t('Enter your keywords');
   }
 
+  $form = array();
   $form['#action'] = $action;
-  $form['prompt'] = array('#type' => 'item', '#title' => $prompt);
-  $form['keys'] = array('#type' => 'textfield', '#title' => '', '#default_value' => $keys, '#size' => $prompt ? 40 : 30, '#maxlength' => 255);
-  $form['submit'] = array('#type' => 'submit', '#value' => t('Search'));
   $form['#attributes'] = array('class' => 'search-form');
+  $form['basic'] = array('#type' => 'item', '#title' => $prompt);
+  $form['basic']['inline'] = array('#type' => 'markup', '#prefix' => '<div class="container-inline">', '#suffix' => '</div>');
+  $form['basic']['inline']['keys'] = array('#type' => 'textfield', '#title' => '', '#default_value' => $keys, '#size' => $prompt ? 40 : 30, '#maxlength' => 255);
+  $form['basic']['inline']['submit'] = array('#type' => 'submit', '#value' => t('Search'));
 
-  return drupal_get_form('search_form', $form);
-}
+  $form = array_merge($form, module_invoke($type, 'search', 'form', $keys));
 
-function theme_search_form($form) {
-  $output  = form_render($form['prompt']);
-  $output .= '<div class="container-inline">';
-  $output .= form_render($form);
-  $output .= '</div>';
-  return $output;
+  return drupal_get_form('search_form', $form);
 }
 
 /**
@@ -692,8 +952,7 @@ function search_data($keys = NULL, $type = 'node') {
  * Used for formatting search results.
  *
  * @param $keys
- *   A string containing keywords. They are split into words using the same
- *   rules as search indexing.
+ *   A string containing a search query.
  *
  * @param $text
  *   The text to extract fragments from.
@@ -702,7 +961,11 @@ function search_data($keys = NULL, $type = 'node') {
  *   A string containing HTML for the excerpt.
  */
 function search_excerpt($keys, $text) {
-  $keys = search_keywords_split($keys);
+  // Extract positive keywords and phrases
+  preg_match_all('/ ("([^"]+)"|(?!OR)([^" ]+))/', ' '. $keys, $matches);
+  $keys = array_merge($matches[2], $matches[3]);
+
+  // Prepare text
   $text = strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text));
   array_walk($keys, '_search_excerpt_replace');
   $workkeys = $keys;
@@ -718,6 +981,7 @@ function search_excerpt($keys, $text) {
     foreach ($workkeys as $k => $key) {
       if (strlen($key) == 0) {
         unset($workkeys[$k]);
+        unset($keys[$k]);
         continue;
       }
       if ($length >= 256) {