Commit c574afad authored by Dries's avatar Dries
Browse files

- Patch #394182 by chx, Berdir: converted search module to the new database...

- Patch #394182 by chx, Berdir: converted search module to the new database abstraction layer. Rest in peace old database layer.
parent 9ef1d496
......@@ -2408,10 +2408,15 @@ function comment_ranking() {
return array(
'comments' => array(
'title' => t('Number of comments'),
'join' => 'LEFT JOIN {node_comment_statistics} node_comment_statistics ON node_comment_statistics.nid = i.sid',
'join' => array(
'type' => 'LEFT',
'table' => 'node_comment_statistics',
'alias' => 'node_comment_statistics',
'on' => 'node_comment_statistics.nid = i.sid',
),
// Inverse law that maps the highest reply count on the site to 1 and 0 to 0.
'score' => '2.0 - 2.0 / (1.0 + node_comment_statistics.comment_count * CAST(%f AS DECIMAL))',
'arguments' => array(variable_get('node_cron_comments_scale', 0)),
'score' => '2.0 - 2.0 / (1.0 + node_comment_statistics.comment_count * CAST(:scale AS DECIMAL))',
'arguments' => array(':scale' => variable_get('node_cron_comments_scale', 0)),
),
);
}
......
......@@ -1228,37 +1228,26 @@ function node_permission() {
/**
* Gather the rankings from the the hook_ranking implementations.
*
* @param $query
* A query object that has been extended with the Search DB Extender.
*/
function _node_rankings() {
$rankings = array(
'total' => 0, 'join' => array(), 'score' => array(), 'args' => array(),
);
function _node_rankings(SelectQueryExtender $query) {
if ($ranking = module_invoke_all('ranking')) {
$tables = &$query->getTables();
foreach ($ranking as $rank => $values) {
if ($node_rank = variable_get('node_rank_' . $rank, 0)) {
// If the table defined in the ranking isn't already joined, then add it.
if (isset($values['join']) && !isset($rankings['join'][$values['join']])) {
$rankings['join'][$values['join']] = $values['join'];
}
// Add the rankings weighted score multiplier value, handling NULL gracefully.
$rankings['score'][] = 'CAST(%f AS DECIMAL) * COALESCE((' . $values['score'] . '), 0)';
// Add the the administrator's weighted score multiplier value for this ranking.
$rankings['total'] += $node_rank;
$rankings['arguments'][] = $node_rank;
// Add any additional arguments used by this ranking.
if (isset($values['arguments'])) {
$rankings['arguments'] = array_merge($rankings['arguments'], $values['arguments']);
if (isset($values['join']) && !isset($tables[$values['join']['alias']])) {
$query->addJoin($values['join']['type'], $values['join']['table'], $values['join']['alias'], $values['join']['on']);
}
$arguments = isset($values['arguments']) ? $values['arguments'] : array();
$query->addScore($values['score'], $arguments, $node_rank);
}
}
}
return $rankings;
}
/**
* Implement hook_search().
*/
......@@ -1305,61 +1294,35 @@ function node_search($op = 'search', $keys = NULL) {
case 'search':
// Build matching conditions
list($join1, $where1) = _db_rewrite_sql();
$arguments1 = array();
$conditions1 = 'n.status = 1';
if ($type = search_query_extract($keys, 'type')) {
$types = array();
foreach (explode(',', $type) as $t) {
$types[] = "n.type = '%s'";
$arguments1[] = $t;
}
$conditions1 .= ' AND (' . implode(' OR ', $types) . ')';
$keys = search_query_insert($keys, 'type');
}
if ($term = search_query_extract($keys, 'term')) {
$terms = array();
foreach (explode(',', $term) as $c) {
$terms[] = "tn.tid = %d";
$arguments1[] = $c;
}
$conditions1 .= ' AND (' . implode(' OR ', $terms) . ')';
$join1 .= ' INNER JOIN {taxonomy_term_node} tn ON n.vid = tn.vid';
$keys = search_query_insert($keys, 'term');
$query = db_select('search_index', 'i')->extend('SearchQuery')->extend('PagerDefault');
$query->join('node', 'n', 'n.nid = i.sid');
$query
->condition('n.status', 1)
->addTag('node_access')
->searchExpression($keys, 'node');
// Insert special keywords.
$query->setOption('type', 'n.type');
$query->setOption('language', 'n.language');
if ($query->setOption('term', 'tn.nid')) {
$query->join('taxonomy_term_node', 'tn', 'n.vid = tn.vid');
}
if ($languages = search_query_extract($keys, 'language')) {
$terms = array();
foreach (explode(',', $languages) as $l) {
$terms[] = "n.language = '%s'";
$arguments1[] = $l;
}
$conditions1 .= ' AND (' . implode(' OR ', $terms) . ')';
$keys = search_query_insert($keys, 'language');
// Only continue if the first pass query matches.
if (!$query->executeFirstPass()) {
return array();
}
// Get the ranking expressions.
$rankings = _node_rankings();
// Add the ranking expressions.
_node_rankings($query);
// When all search factors are disabled (ie they have a weight of zero),
// The default score is based only on keyword relevance.
if ($rankings['total'] == 0) {
$total = 1;
$arguments2 = array();
$join2 = '';
$select2 = 'SUM(i.relevance) AS calculated_score';
}
else {
$total = $rankings['total'];
$arguments2 = $rankings['arguments'];
$join2 = implode(' ', $rankings['join']);
$select2 = 'SUM(' . implode(' + ', $rankings['score']) . ') AS calculated_score';
}
// Do search.
$find = do_search($keys, 'node', 'INNER JOIN {node} n ON n.nid = i.sid ' . $join1, $conditions1 . (empty($where1) ? '' : ' AND ' . $where1), $arguments1, $select2, $join2, $arguments2);
// Add a count query.
$inner_query = clone $query;
$count_query = db_select($inner_query->fields('i', array('sid')));
$count_query->addExpression('COUNT(*)');
$query->setCountQuery($count_query);
$find = $query
->limit(10)
->execute();
// Load results.
$results = array();
......@@ -1370,9 +1333,9 @@ function node_search($op = 'search', $keys = NULL) {
$node->rendered = drupal_render($node->content);
// Fetch comments for snippet.
$node->rendered .= module_invoke('comment', 'node_update_index', $node);
$node->rendered .= ' ' . module_invoke('comment', 'node_update_index', $node);
// Fetch terms for snippet.
$node->rendered .= module_invoke('taxonomy', 'node_update_index', $node);
$node->rendered .= ' ' . module_invoke('taxonomy', 'node_update_index', $node);
$extra = module_invoke_all('node_search_result', $node);
......@@ -1384,7 +1347,7 @@ function node_search($op = 'search', $keys = NULL) {
'date' => $node->changed,
'node' => $node,
'extra' => $extra,
'score' => $total ? ($item->calculated_score / $total) : 0,
'score' => $item->calculated_score,
'snippet' => search_excerpt($keys, $node->rendered),
);
}
......@@ -1420,8 +1383,8 @@ function node_ranking() {
$ranking['recent'] = array(
'title' => t('Recently posted'),
// Exponential decay with half-life of 6 months, starting at last indexed node
'score' => 'POW(2.0, (GREATEST(n.created, n.changed) - %d) * 6.43e-8)',
'arguments' => array($node_cron_last),
'score' => 'POW(2.0, (GREATEST(n.created, n.changed) - :node_cron_last) * 6.43e-8)',
'arguments' => array(':node_cron_last' => $node_cron_last),
);
}
return $ranking;
......@@ -2082,15 +2045,15 @@ function node_search_validate($form, &$form_state) {
// checkboxes to 0.
$form_state['values']['type'] = array_filter($form_state['values']['type']);
if (count($form_state['values']['type'])) {
$keys = search_query_insert($keys, 'type', implode(',', array_keys($form_state['values']['type'])));
$keys = search_expression_insert($keys, 'type', implode(',', array_keys($form_state['values']['type'])));
}
}
if (isset($form_state['values']['term']) && is_array($form_state['values']['term'])) {
$keys = search_query_insert($keys, 'term', implode(',', $form_state['values']['term']));
$keys = search_expression_insert($keys, 'term', implode(',', $form_state['values']['term']));
}
if (isset($form_state['values']['language']) && is_array($form_state['values']['language'])) {
$keys = search_query_insert($keys, 'language', implode(',', array_filter($form_state['values']['language'])));
$keys = search_expression_insert($keys, 'language', implode(',', array_filter($form_state['values']['language'])));
}
if ($form_state['values']['or'] != '') {
if (preg_match_all('/ ("[^"]+"|[^" ]+)/i', ' ' . $form_state['values']['or'], $matches)) {
......
......@@ -29,13 +29,14 @@
* capabilities. To do this, node module also implements hook_update_index()
* which is used to create and maintain the index.
*
* We call do_search() with the keys, the module name, and extra SQL fragments
* to use when searching. See hook_update_index() for more information.
* We call db_select('search_index', 'i')->extend('SearchQuery') and then add
* the keys, the module name, and extra SQL fragments to use when searching.
* See hook_update_index() for more information.
*
* @param $op
* A string defining which operation to perform:
* - 'admin': The hook should return a form array containing any fieldsets the
* module wants to add to the Search settings page at
* module wants to add to the Search settings page at
* admin/config/search/settings.
* - 'name': The hook should return a translated name defining the type of
* items that are searched for with this module ('content', 'users', ...).
......@@ -77,12 +78,15 @@ function hook_search($op = 'search', $keys = NULL) {
return t('Content');
case 'reset':
db_query("UPDATE {search_dataset} SET reindex = %d WHERE type = 'node'", REQUEST_TIME);
db_update('search_dataset')
->fields(array('reindex' => REQUEST_TIME))
->condition('type', 'node')
->execute();
return;
case 'status':
$total = db_result(db_query('SELECT COUNT(*) FROM {node} WHERE status = 1'));
$remaining = db_result(db_query("SELECT COUNT(*) FROM {node} n LEFT JOIN {search_dataset} d ON d.type = 'node' AND d.sid = n.nid WHERE n.status = 1 AND d.sid IS NULL OR d.reindex <> 0"));
$total = db_query('SELECT COUNT(*) FROM {node} WHERE status = 1')->fetchField();
$remaining = db_query("SELECT COUNT(*) FROM {node} n LEFT JOIN {search_dataset} d ON d.type = 'node' AND d.sid = n.nid WHERE n.status = 1 AND d.sid IS NULL OR d.reindex <> 0")->fetchField();
return array('remaining' => $remaining, 'total' => $total);
case 'admin':
......@@ -111,61 +115,35 @@ function hook_search($op = 'search', $keys = NULL) {
case 'search':
// Build matching conditions
list($join1, $where1) = _db_rewrite_sql();
$arguments1 = array();
$conditions1 = 'n.status = 1';
if ($type = search_query_extract($keys, 'type')) {
$types = array();
foreach (explode(',', $type) as $t) {
$types[] = "n.type = '%s'";
$arguments1[] = $t;
}
$conditions1 .= ' AND (' . implode(' OR ', $types) . ')';
$keys = search_query_insert($keys, 'type');
}
if ($category = search_query_extract($keys, 'category')) {
$categories = array();
foreach (explode(',', $category) as $c) {
$categories[] = "tn.tid = %d";
$arguments1[] = $c;
}
$conditions1 .= ' AND (' . implode(' OR ', $categories) . ')';
$join1 .= ' INNER JOIN {taxonomy_term_node} tn ON n.vid = tn.vid';
$keys = search_query_insert($keys, 'category');
$query = db_search()->extend('PagerDefault');
$query->join('node', 'n', 'n.nid = i.sid');
$query
->condition('n.status', 1)
->addTag('node_access')
->searchExpression($keys, 'node');
// Insert special keywords.
$query->setOption('type', 'n.type');
$query->setOption('language', 'n.language');
if ($query->setOption('term', 'tn.nid')) {
$query->join('taxonomy_term_node', 'tn', 'n.vid = tn.vid');
}
if ($languages = search_query_extract($keys, 'language')) {
$categories = array();
foreach (explode(',', $languages) as $l) {
$categories[] = "n.language = '%s'";
$arguments1[] = $l;
}
$conditions1 .= ' AND (' . implode(' OR ', $categories) . ')';
$keys = search_query_insert($keys, 'language');
// Only continue if the first pass query matches.
if (!$query->executeFirstPass()) {
return array();
}
// Get the ranking expressions.
$rankings = _node_rankings();
// When all search factors are disabled (ie they have a weight of zero),
// The default score is based only on keyword relevance.
if ($rankings['total'] == 0) {
$total = 1;
$arguments2 = array();
$join2 = '';
$select2 = 'i.relevance AS score';
}
else {
$total = $rankings['total'];
$arguments2 = $rankings['arguments'];
$join2 = implode(' ', $rankings['join']);
$select2 = '(' . implode(' + ', $rankings['score']) . ') AS score';
}
// Add the ranking expressions.
_node_rankings($query);
// Do search.
$find = do_search($keys, 'node', 'INNER JOIN {node} n ON n.nid = i.sid ' . $join1, $conditions1 . (empty($where1) ? '' : ' AND ' . $where1), $arguments1, $select2, $join2, $arguments2);
// Add a count query.
$inner_query = clone $query;
$count_query = db_select($inner_query->fields('i', array('sid')));
$count_query->addExpression('COUNT(*)');
$query->setCountQuery($count_query);
$find = $query
->limit(10)
->execute();
// Load results.
$results = array();
......@@ -176,9 +154,9 @@ function hook_search($op = 'search', $keys = NULL) {
$node->body = drupal_render($node->content);
// Fetch comments for snippet.
$node->body .= module_invoke('comment', 'node', $node, 'update_index');
$node->rendered .= ' ' . module_invoke('comment', 'node_update_index', $node);
// Fetch terms for snippet.
$node->body .= module_invoke('taxonomy', 'node', $node, 'update_index');
$node->rendered .= ' ' . module_invoke('taxonomy', 'node_update_index', $node);
$extra = module_invoke_all('node_search_result', $node);
......@@ -190,7 +168,7 @@ function hook_search($op = 'search', $keys = NULL) {
'date' => $node->changed,
'node' => $node,
'extra' => $extra,
'score' => $total ? ($item->score / $total) : 0,
'score' => $item->calculated_score,
'snippet' => search_excerpt($keys, $node->body),
);
}
......
......@@ -9,3 +9,4 @@ files[] = search.admin.inc
files[] = search.pages.inc
files[] = search.install
files[] = search.test
files[] = search.extender.inc
......@@ -253,11 +253,20 @@ function search_reindex($sid = NULL, $type = NULL, $reindex = FALSE) {
module_invoke_all('search', 'reset');
}
else {
db_query("DELETE FROM {search_dataset} WHERE sid = %d AND type = '%s'", $sid, $type);
db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type);
db_delete('search_dataset')
->condition('sid', $sid)
->condition('type', $type)
->execute();
db_delete('search_index')
->condition('sid', $sid)
->condition('type', $type)
->execute();
// Don't remove links if re-indexing.
if (!$reindex) {
db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type);
db_delete('search_node_links')
->condition('sid', $sid)
->condition('type', $type)
->execute();
}
}
}
......@@ -297,20 +306,29 @@ function search_cron() {
* up to date (even if cron times out or otherwise fails).
*/
function search_update_totals() {
// Update word IDF (Inverse Document Frequency) counts for new/changed words
// Update word IDF (Inverse Document Frequency) counts for new/changed words.
foreach (search_dirty() as $word => $dummy) {
// Get total count
$total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word));
// Apply Zipf's law to equalize the probability distribution
$total = db_query("SELECT SUM(score) FROM {search_index} WHERE word = :word", array(':word' => $word))->fetchField();
// Apply Zipf's law to equalize the probability distribution.
$total = log10(1 + 1/(max(1, $total)));
db_merge('search_total')->key(array('word' => $word))->fields(array('count' => $total))->execute();
db_merge('search_total')
->key(array('word' => $word))
->fields(array('count' => $total))
->execute();
}
// Find words that were deleted from search_index, but are still in
// search_total. We use a LEFT JOIN between the two tables and keep only the
// rows which fail to join.
$result = db_query("SELECT t.word AS realword, i.word FROM {search_total} t LEFT JOIN {search_index} i ON t.word = i.word WHERE i.word IS NULL");
while ($word = db_fetch_object($result)) {
db_query("DELETE FROM {search_total} WHERE word = '%s'", $word->realword);
$or = db_or();
foreach ($result as $word) {
$or->condition('word', $word->realword);
}
if (count($or) > 0) {
db_delete('search_total')
->condition($or)
->execute();
}
}
......@@ -583,27 +601,40 @@ function search_index($sid, $type, $text) {
search_reindex($sid, $type, TRUE);
// Insert cleaned up data into dataset
db_query("INSERT INTO {search_dataset} (sid, type, data, reindex) VALUES (%d, '%s', '%s', %d)", $sid, $type, $accum, 0);
db_insert('search_dataset')
->fields(array(
'sid' => $sid,
'type' => $type,
'data' => $accum,
'reindex' => 0,
))
->execute();
// Insert results into search index
foreach ($results[0] as $word => $score) {
// If a word already exists in the database, its score gets increased
// appropriately. If not, we create a new record with the appropriate
// starting score.
db_merge('search_index')->key(array(
'word' => $word,
'sid' => $sid,
'type' => $type,
))->fields(array('score' => $score))->expression('score', 'score + :score', array(':score' => $score))
->execute();
db_merge('search_index')
->key(array(
'word' => $word,
'sid' => $sid,
'type' => $type,
))
->fields(array('score' => $score))
->expression('score', 'score + :score', array(':score' => $score))
->execute();
search_dirty($word);
}
unset($results[0]);
// Get all previous links from this item.
$result = db_query("SELECT nid, caption FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type);
$result = db_query("SELECT nid, caption FROM {search_node_links} WHERE sid = :sid AND type = :type", array(
':sid' => $sid,
':type' => $type
));
$links = array();
while ($link = db_fetch_object($result)) {
foreach ($result as $link) {
$links[$link->nid] = $link->caption;
}
......@@ -613,7 +644,12 @@ function search_index($sid, $type, $text) {
if (isset($links[$nid])) {
if ($links[$nid] != $caption) {
// Update the existing link and mark the node for reindexing.
db_query("UPDATE {search_node_links} SET caption = '%s' WHERE sid = %d AND type = '%s' AND nid = %d", $caption, $sid, $type, $nid);
db_update('search_node_links')
->fields(array('caption' => $caption))
->condition('sid', $sid)
->condition('type', $type)
->condition('nid', $nid)
->execute();
search_touch_node($nid);
}
// Unset the link to mark it as processed.
......@@ -621,13 +657,24 @@ function search_index($sid, $type, $text) {
}
else {
// Insert the existing link and mark the node for reindexing.
db_query("INSERT INTO {search_node_links} (caption, sid, type, nid) VALUES ('%s', %d, '%s', %d)", $caption, $sid, $type, $nid);
db_insert('search_node_links')
->fields(array(
'caption' => $caption,
'sid' => $sid,
'type' => $type,
'nid' => $nid,
))
->execute();
search_touch_node($nid);
}
}
// Any left-over links in $links no longer exist. Delete them and mark the nodes for reindexing.
foreach ($links as $nid => $caption) {
db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s' AND nid = %d", $sid, $type, $nid);
db_delete('search_node_links')
->condition('sid', $sid)
->condition('type', $type)
->condition('nid', $nid)
->execute();
search_touch_node($nid);
}
}
......@@ -639,7 +686,11 @@ function search_index($sid, $type, $text) {
* The nid of the node that needs reindexing.
*/
function search_touch_node($nid) {
db_query("UPDATE {search_dataset} SET reindex = %d WHERE sid = %d AND type = 'node'", REQUEST_TIME, $nid);
db_update('search_dataset')
->fields(array('reindex' => REQUEST_TIME))
->condition('type', 'node')
->condition('sid', $nid)
->execute();
}
/**
......@@ -647,9 +698,9 @@ function search_touch_node($nid) {
*/
function search_node_update_index($node) {
// Transplant links to a node into the target node.
$result = db_query("SELECT caption FROM {search_node_links} WHERE nid = %d", $node->nid);
$result = db_query("SELECT caption FROM {search_node_links} WHERE nid = :nid", array(':nid' => $node->nid));
$output = array();
while ($link = db_fetch_object($result)) {
foreach ($result as $link) {
$output[] = $link->caption;
}
if (count($output)) {
......@@ -709,7 +760,7 @@ function search_comment_unpublish($comment) {
/**
* Extract a module-specific search option from a search query. e.g. 'type:book'
*/
function search_query_extract($keys, $option) {
function search_expression_extract($keys, $option) {
if (preg_match('/(^| )' . $option . ':([^ ]*)( |$)/i', $keys, $matches)) {
return $matches[2];
}
......@@ -719,8 +770,8 @@ function search_query_extract($keys, $option) {
* Return a query with the given module-specific search option inserted in.
* e.g. 'type:book'.
*/
function search_query_insert($keys, $option, $value = '') {
if (search_query_extract($keys, $option)) {
function search_expression_insert($keys, $option, $value = '') {
if (search_expression_extract($keys, $option)) {
$keys = trim(preg_replace('/(^| )' . $option . ':[^ ]*/i', '', $keys));
}
if ($value != '') {
......@@ -729,281 +780,6 @@ function search_query_insert($keys, $option, $value = '') {
return $keys;
}
/**
* Parse a search query into SQL conditions.
*
* We build two queries that matches the dataset bodies. @See do_search for
* more about these.
*
* @param $text
* The search keys.
* @return
* A list of six elements.
* * A series of statements AND'd together which will be used to provide all
* possible matches.
* * Arguments for this query part.
* * A series of exact word matches OR'd together.
* * Arguments for this query part.
* * A boolean indicating whether this is a simple query or not. Negative
* terms, presence of both AND / OR make this FALSE.
* * A boolean indicating the presence of a lowercase or. Maybe the user
* wanted to use OR.
*/
function search_parse_query($text) {
$keys = array('positive' => array(), 'negative' => array());
// Tokenize query string
preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $text, $matches, PREG_SET_ORDER);
if (count($matches) < 1) {
return NULL;
}
// Classify tokens
$or = FALSE;
$warning = '';
$simple = TRUE;
foreach ($matches as $match) {
$phrase = FALSE;
// Strip off phrase quotes
if ($match[2]{0} == '"') {
$match[2] = substr($match[2], 1, -1);
$phrase = TRUE;
$simple = FALSE;
}
// Simplify keyword according to indexing rules and external preprocessors
$words = search_simplify($match[2]);
// Re-explode in case simplification added more words, except when matching a phrase
$words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
// Negative matches
if ($match[1] == '-') {
$keys['negative'] = array_merge($keys['negative'], $words);
}
// OR operator: instead of a single keyword, we store an array of all
// OR'd keywords.
elseif ($match[2] == 'OR' && count($keys['positive'])) {
$last = array_pop($keys['positive']);
// Starting a new OR?
if (!is_array($last)) {
$last = array($last);