search.module 46.3 KB
Newer Older
Dries Buytaert's avatar
 
Dries Buytaert committed
1
<?php
2
// $Id$
Dries Buytaert's avatar
 
Dries Buytaert committed
3

Dries Buytaert's avatar
   
Dries Buytaert committed
4
5
6
7
8
/**
 * @file
 * Enables site-wide keyword searching.
 */

9
/**
10
 * Matches Unicode character classes to exclude from the search index.
Steven Wittens's avatar
Steven Wittens committed
11
 *
12
13
 * See: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
 *
14
 * The index only contains the following character classes:
15
16
17
18
19
20
21
 * Lu     Letter, Uppercase
 * Ll     Letter, Lowercase
 * Lt     Letter, Titlecase
 * Lo     Letter, Other
 * Nd     Number, Decimal Digit
 * No     Number, Other
 */
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
define('PREG_CLASS_SEARCH_EXCLUDE',
'\x{0}-\x{2f}\x{3a}-\x{40}\x{5b}-\x{60}\x{7b}-\x{bf}\x{d7}\x{f7}\x{2b0}-'.
'\x{385}\x{387}\x{3f6}\x{482}-\x{489}\x{559}-\x{55f}\x{589}-\x{5c7}\x{5f3}-'.
'\x{61f}\x{640}\x{64b}-\x{65e}\x{66a}-\x{66d}\x{670}\x{6d4}\x{6d6}-\x{6ed}'.
'\x{6fd}\x{6fe}\x{700}-\x{70f}\x{711}\x{730}-\x{74a}\x{7a6}-\x{7b0}\x{901}-'.
'\x{903}\x{93c}\x{93e}-\x{94d}\x{951}-\x{954}\x{962}-\x{965}\x{970}\x{981}-'.
'\x{983}\x{9bc}\x{9be}-\x{9cd}\x{9d7}\x{9e2}\x{9e3}\x{9f2}-\x{a03}\x{a3c}-'.
'\x{a4d}\x{a70}\x{a71}\x{a81}-\x{a83}\x{abc}\x{abe}-\x{acd}\x{ae2}\x{ae3}'.
'\x{af1}-\x{b03}\x{b3c}\x{b3e}-\x{b57}\x{b70}\x{b82}\x{bbe}-\x{bd7}\x{bf0}-'.
'\x{c03}\x{c3e}-\x{c56}\x{c82}\x{c83}\x{cbc}\x{cbe}-\x{cd6}\x{d02}\x{d03}'.
'\x{d3e}-\x{d57}\x{d82}\x{d83}\x{dca}-\x{df4}\x{e31}\x{e34}-\x{e3f}\x{e46}-'.
'\x{e4f}\x{e5a}\x{e5b}\x{eb1}\x{eb4}-\x{ebc}\x{ec6}-\x{ecd}\x{f01}-\x{f1f}'.
'\x{f2a}-\x{f3f}\x{f71}-\x{f87}\x{f90}-\x{fd1}\x{102c}-\x{1039}\x{104a}-'.
'\x{104f}\x{1056}-\x{1059}\x{10fb}\x{10fc}\x{135f}-\x{137c}\x{1390}-\x{1399}'.
'\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16f0}\x{1712}-\x{1714}'.
'\x{1732}-\x{1736}\x{1752}\x{1753}\x{1772}\x{1773}\x{17b4}-\x{17db}\x{17dd}'.
'\x{17f0}-\x{180e}\x{1843}\x{18a9}\x{1920}-\x{1945}\x{19b0}-\x{19c0}\x{19c8}'.
'\x{19c9}\x{19de}-\x{19ff}\x{1a17}-\x{1a1f}\x{1d2c}-\x{1d61}\x{1d78}\x{1d9b}-'.
'\x{1dc3}\x{1fbd}\x{1fbf}-\x{1fc1}\x{1fcd}-\x{1fcf}\x{1fdd}-\x{1fdf}\x{1fed}-'.
'\x{1fef}\x{1ffd}-\x{2070}\x{2074}-\x{207e}\x{2080}-\x{2101}\x{2103}-\x{2106}'.
'\x{2108}\x{2109}\x{2114}\x{2116}-\x{2118}\x{211e}-\x{2123}\x{2125}\x{2127}'.
'\x{2129}\x{212e}\x{2132}\x{213a}\x{213b}\x{2140}-\x{2144}\x{214a}-\x{2b13}'.
'\x{2ce5}-\x{2cff}\x{2d6f}\x{2e00}-\x{3005}\x{3007}-\x{303b}\x{303d}-\x{303f}'.
'\x{3099}-\x{309e}\x{30a0}\x{30fb}-\x{30fe}\x{3190}-\x{319f}\x{31c0}-\x{31cf}'.
'\x{3200}-\x{33ff}\x{4dc0}-\x{4dff}\x{a015}\x{a490}-\x{a716}\x{a802}\x{a806}'.
'\x{a80b}\x{a823}-\x{a82b}\x{d800}-\x{f8ff}\x{fb1e}\x{fb29}\x{fd3e}\x{fd3f}'.
'\x{fdfc}-\x{fe6b}\x{feff}-\x{ff0f}\x{ff1a}-\x{ff20}\x{ff3b}-\x{ff40}\x{ff5b}-'.
'\x{ff65}\x{ff70}\x{ff9e}\x{ff9f}\x{ffe0}-\x{fffd}');
50
51

/**
Steven Wittens's avatar
Steven Wittens committed
52
 * Matches all 'N' Unicode character classes (numbers)
53
 */
54
55
56
57
58
59
60
61
62
63
define('PREG_CLASS_NUMBERS',
'\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}'.
'\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}'.
'\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}'.
'\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-'.
'\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}'.
'\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}'.
'\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}'.
'\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-'.
'\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}');
64
65

/**
Steven Wittens's avatar
Steven Wittens committed
66
 * Matches all 'P' Unicode character classes (punctuation)
67
 */
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
define('PREG_CLASS_PUNCTUATION',
'\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}'.
'\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}'.
'\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}'.
'\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}'.
'\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}'.
'\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}'.
'\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}'.
'\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}'.
'\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-'.
'\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}'.
'\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}'.
'\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}'.
'\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}'.
'\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-'.
'\x{ff65}');

/**
 * Matches all CJK characters that are candidates for auto-splitting
 * (Chinese, Japanese, Korean).
 * Contains kana and BMP ideographs.
 */
define('PREG_CLASS_CJK', '\x{3041}-\x{30ff}\x{31f0}-\x{31ff}\x{3400}-\x{4db5}'.
'\x{4e00}-\x{9fbb}\x{f900}-\x{fad9}');
92

Dries Buytaert's avatar
   
Dries Buytaert committed
93
94
95
/**
 * Implementation of hook_help().
 */
96
function search_help($section) {
Dries Buytaert's avatar
   
Dries Buytaert committed
97
  switch ($section) {
98
    case 'admin/help#search':
99
100
      $output = '<p>'. t('The search module adds the ability to search for content by keywords. Search is often the only practical way to find content on a large site. Search is useful for finding users and posts by searching on keywords.') .'</p>';
      $output .= '<p>'. t('The search engine works by maintaining an index of the words in your site\'s content. It indexes the posts and users. You can adjust the settings to tweak the indexing behaviour. Note that the search requires cron to be set up correctly. The index percentage sets the maximum amount of items that will be indexed in one cron run. Set this number lower if your cron is timing out or if PHP is running out of memory.') .'</p>';
101
      $output .= '<p>'. t('For more information please read the configuration and customization handbook <a href="@search">Search page</a>.', array('@search' => 'http://drupal.org/handbook/modules/search/')) .'</p>';
102
      return $output;
103
    case 'admin/settings/search':
104
      return '<p>'. t('The search engine works by maintaining an index of the words in your site\'s content. You can adjust the settings below to tweak the indexing behaviour. Note that the search requires cron to be set up correctly.') .'</p>';
105
    case 'search#noresults':
106
      return t('<ul>
107
<li>Check if your spelling is correct.</li>
108
109
<li>Remove quotes around phrases to match each word individually: <em>"blue smurf"</em> will match less than <em>blue smurf</em>.</li>
<li>Consider loosening your query with <em>OR</em>: <em>blue smurf</em> will match less than <em>blue OR smurf</em>.</li>
110
</ul>');
Dries Buytaert's avatar
   
Dries Buytaert committed
111
  }
Dries Buytaert's avatar
   
Dries Buytaert committed
112
}
Kjartan Mannes's avatar
Kjartan Mannes committed
113
114

/**
Dries Buytaert's avatar
   
Dries Buytaert committed
115
 * Implementation of hook_perm().
Kjartan Mannes's avatar
Kjartan Mannes committed
116
 */
Dries Buytaert's avatar
 
Dries Buytaert committed
117
function search_perm() {
118
  return array('search content', 'use advanced search', 'administer search');
Dries Buytaert's avatar
 
Dries Buytaert committed
119
120
}

121
122
123
124
125
126
127
128
/**
 * Implementation of hook_block().
 */
function search_block($op = 'list', $delta = 0) {
  if ($op == 'list') {
    $blocks[0]['info'] = t('Search form');
    return $blocks;
  }
129
  else if ($op == 'view' && user_access('search content')) {
130
    $block['content'] = drupal_get_form('search_block_form');
131
132
133
134
135
    $block['subject'] = t('Search');
    return $block;
  }
}

Dries Buytaert's avatar
   
Dries Buytaert committed
136
137
138
/**
 * Implementation of hook_menu().
 */
Dries Buytaert's avatar
   
Dries Buytaert committed
139
function search_menu($may_cache) {
Dries Buytaert's avatar
   
Dries Buytaert committed
140
  $items = array();
Dries Buytaert's avatar
   
Dries Buytaert committed
141
142

  if ($may_cache) {
Dries Buytaert's avatar
Dries Buytaert committed
143
    $items[] = array('path' => 'search',
144
      'title' => t('Search'),
145
146
      'callback' => 'search_view',
      'callback arguments' => array('node'),
Dries Buytaert's avatar
   
Dries Buytaert committed
147
148
      'access' => user_access('search content'),
      'type' => MENU_SUGGESTED_ITEM);
Dries Buytaert's avatar
Dries Buytaert committed
149
    $items[] = array('path' => 'admin/settings/search',
150
      'title' => t('Search settings'),
151
      'description' => t('Configure relevance settings for search and other indexing options'),
152
153
      'callback' => 'drupal_get_form',
      'callback arguments' => array('search_admin_settings'),
Dries Buytaert's avatar
Dries Buytaert committed
154
      'access' => user_access('administer search'),
155
      'type' => MENU_NORMAL_ITEM);
Dries Buytaert's avatar
Dries Buytaert committed
156
    $items[] = array('path' => 'admin/settings/search/wipe',
157
      'title' => t('Clear index'),
158
159
      'callback' => 'drupal_get_form',
      'callback arguments' => array('search_wipe_confirm'),
160
161
      'access' => user_access('administer search'),
      'type' => MENU_CALLBACK);
162
    $items[] = array('path' => 'admin/logs/search', 'title' => t('Top search phrases'),
163
164
165
      'description' => t('View most popular search phrases.'),
      'callback' => 'watchdog_top',
      'callback arguments' => array('search'));
166
167
168
169
170
  }
  else if (arg(0) == 'search') {
    // To remember the user's search keywords when switching across tabs,
    // we dynamically add the keywords to the search tabs' paths.
    $keys = search_get_keys();
171
    $keys = strlen($keys) ? '/'. $keys : '';
172
    foreach (module_list() as $name) {
173
174
      if (module_hook($name, 'search') && $title = module_invoke($name, 'search', 'name')) {
        $items[] = array('path' => 'search/'. $name . $keys, 'title' => $title,
175
          'callback' => 'search_view',
176
          'callback arguments' => array($name),
177
          'access' => user_access('search content'),
178
          'type' => MENU_LOCAL_TASK);
179
180
      }
    }
Dries Buytaert's avatar
   
Dries Buytaert committed
181
182
  }

Dries Buytaert's avatar
   
Dries Buytaert committed
183
184
185
186
  return $items;
}

/**
187
 * Validate callback.
Dries Buytaert's avatar
   
Dries Buytaert committed
188
 */
189
function search_admin_settings_validate($form_id, $form_values) {
190
  if ($form_values['op'] == t('Re-index site')) {
191
192
193
    drupal_goto('admin/settings/search/wipe');
  }
  // If these settings change, the index needs to be rebuilt.
194
195
  if ((variable_get('minimum_word_size', 3) != $form_values['minimum_word_size']) ||
      (variable_get('overlap_cjk', TRUE) != $form_values['overlap_cjk'])) {
196
197
    drupal_set_message(t('The index will be rebuilt.'));
    search_wipe();
Dries Buytaert's avatar
Dries Buytaert committed
198
  }
199
}
Dries Buytaert's avatar
   
Dries Buytaert committed
200

201
202
203
/**
 * Menu callback; displays the search module settings page.
 */
Dries Buytaert's avatar
Dries Buytaert committed
204
function search_admin_settings() {
205
206
207
208
209
210
211
212
213
  // Collect some stats
  $remaining = 0;
  $total = 0;
  foreach (module_list() as $module) {
    if (module_hook($module, 'search')) {
      $status = module_invoke($module, 'search', 'status');
      $remaining += $status['remaining'];
      $total += $status['total'];
    }
Dries Buytaert's avatar
Dries Buytaert committed
214
  }
215
  $count = format_plural($remaining, 'There is 1 item left to index.', 'There are @count items left to index.');
216
  $percentage = ((int)min(100, 100 * ($total - $remaining) / max(1, $total))) . '%';
217
  $status = '<p><strong>'. t('%percentage of the site has been indexed.', array('%percentage' => $percentage)) .' '. $count .'</strong></p>';
218
  $form['status'] = array('#type' => 'fieldset', '#title' => t('Indexing status'));
219
  $form['status']['status'] = array('#value' => $status);
220
  $form['status']['wipe'] = array('#type' => 'submit', '#value' => t('Re-index site'));
221
222

  $items = drupal_map_assoc(array(10, 20, 50, 100, 200, 500));
223
224

  // Indexing throttle:
225
226
  $form['indexing_throttle'] = array('#type' => 'fieldset', '#title' => t('Indexing throttle'));
  $form['indexing_throttle']['search_cron_limit'] = array('#type' => 'select', '#title' => t('Items to index per cron run'), '#default_value' => variable_get('search_cron_limit', 100), '#options' => $items, '#description' => t('The maximum amount of items that will be indexed in one cron run. Set this number lower if your cron is timing out or if PHP is running out of memory.'));
Dries Buytaert's avatar
   
Dries Buytaert committed
227
  // Indexing settings:
228
  $form['indexing_settings'] = array('#type' => 'fieldset', '#title' => t('Indexing settings'));
229
  $form['indexing_settings']['info'] = array('#value' => '<em>'. t('<p>Changing the settings below will cause the site index to be rebuilt. The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed.</p><p>The default settings should be appropriate for the majority of sites.</p>') .'</em>');
230
  $form['indexing_settings']['minimum_word_size'] = array('#type' => 'textfield', '#title' => t('Minimum word length to index'), '#default_value' => variable_get('minimum_word_size', 3), '#size' => 5, '#maxlength' => 3, '#description' => t('The number of characters a word has to be to be indexed. A lower setting means better search result ranking, but also a larger database. Each search query must contain at least one keyword that is this size (or longer).'));
231
  $form['indexing_settings']['overlap_cjk'] = array('#type' => 'checkbox', '#title' => t('Simple CJK handling'), '#default_value' => variable_get('overlap_cjk', TRUE), '#description' => t('Whether to apply a simple Chinese/Japanese/Korean tokenizer based on overlapping sequences. Turn this off if you want to use an external preprocessor for this instead. Does not affect other languages.'));
Dries Buytaert's avatar
   
Dries Buytaert committed
232

233
234
  // Per module settings
  $form = array_merge($form, module_invoke_all('search', 'admin'));
Dries Buytaert's avatar
Dries Buytaert committed
235

236
  return system_settings_form($form);
Dries Buytaert's avatar
   
Dries Buytaert committed
237
238
}

239
240
241
242
/**
 * Menu callback: confirm wiping of the index.
 */
function search_wipe_confirm() {
243
  return confirm_form(array(), t('Are you sure you want to re-index the site?'),
244
                  'admin/settings/search', t(' The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed. This action cannot be undone.'), t('Re-index site'), t('Cancel'));
245
246
247
248
249
}

/**
 * Handler for wipe confirmation
 */
250
function search_wipe_confirm_submit($form_id, &$form) {
251
252
253
  if ($form['confirm']) {
    search_wipe();
    drupal_set_message(t('The index will be rebuilt.'));
254
    return 'admin/settings/search';
255
256
257
  }
}

Dries Buytaert's avatar
Dries Buytaert committed
258
/**
259
260
261
262
263
264
265
 * Wipes a part of or the entire search index.
 *
 * @param $sid
 *  (optional) The SID of the item to wipe. If specified, $type must be passed
 *  too.
 * @param $type
 *  (optional) The type of item to wipe.
Dries Buytaert's avatar
Dries Buytaert committed
266
 */
267
function search_wipe($sid = NULL, $type = NULL, $reindex = FALSE) {
268
269
270
271
  if ($type == NULL && $sid == NULL) {
    module_invoke_all('search', 'reset');
  }
  else {
272
    db_query("DELETE FROM {search_dataset} WHERE sid = %d AND type = '%s'", $sid, $type);
273
    db_query("DELETE FROM {search_index} WHERE fromsid = %d AND fromtype = '%s'", $sid, $type);
274
    // When re-indexing, keep link references
275
    db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'". ($reindex ? " AND fromsid = 0" : ''), $sid, $type);
276
  }
Dries Buytaert's avatar
Dries Buytaert committed
277
278
}

279
280
281
282
283
/**
 * Marks a word as dirty (or retrieves the list of dirty words). This is used
 * during indexing (cron). Words which are dirty have outdated total counts in
 * the search_total table, and need to be recounted.
 */
284
function search_dirty($word = NULL) {
285
  static $dirty = array();
286
287
  if ($word !== NULL) {
    $dirty[$word] = TRUE;
288
289
290
291
292
293
  }
  else {
    return $dirty;
  }
}

Kjartan Mannes's avatar
Kjartan Mannes committed
294
/**
Dries Buytaert's avatar
   
Dries Buytaert committed
295
296
 * Implementation of hook_cron().
 *
297
298
 * Fires hook_update_index() in all modules and cleans up dirty words (see
 * search_dirty).
Kjartan Mannes's avatar
Kjartan Mannes committed
299
300
 */
function search_cron() {
301
302
303
304
  // We register a shutdown function to ensure that search_total is always up
  // to date.
  register_shutdown_function('search_update_totals');

Dries Buytaert's avatar
Dries Buytaert committed
305
  // Update word index
Kjartan Mannes's avatar
Kjartan Mannes committed
306
  foreach (module_list() as $module) {
307
308
    module_invoke($module, 'update_index');
  }
309
310
311
312
313
314
315
}

/**
 * This function is called on shutdown to ensure that search_total is always
 * up to date (even if cron times out or otherwise fails).
 */
function search_update_totals() {
316
  // Update word IDF (Inverse Document Frequency) counts for new/changed words
317
  foreach (search_dirty() as $word => $dummy) {
318
    // Get total count
319
    $total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word));
320
321
322
    // Apply Zipf's law to equalize the probability distribution
    $total = log10(1 + 1/(max(1, $total)));
    db_query("UPDATE {search_total} SET count = %f WHERE word = '%s'", $total, $word);
Dries Buytaert's avatar
Dries Buytaert committed
323
    if (!db_affected_rows()) {
324
      db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %f)", $word, $total);
Dries Buytaert's avatar
Dries Buytaert committed
325
326
327
328
329
330
331
332
    }
  }
  // Find words that were deleted from search_index, but are still in
  // search_total. We use a LEFT JOIN between the two tables and keep only the
  // rows which fail to join.
  $result = db_query("SELECT t.word AS realword, i.word FROM {search_total} t LEFT JOIN {search_index} i ON t.word = i.word WHERE i.word IS NULL");
  while ($word = db_fetch_object($result)) {
    db_query("DELETE FROM {search_total} WHERE word = '%s'", $word->realword);
Kjartan Mannes's avatar
Kjartan Mannes committed
333
334
335
336
  }
}

/**
337
 * Simplifies a string according to indexing rules.
Kjartan Mannes's avatar
Kjartan Mannes committed
338
 */
339
function search_simplify($text) {
340
341
  // Decode entities to UTF-8
  $text = decode_entities($text);
342

343
344
345
  // Lowercase
  $text = drupal_strtolower($text);

346
347
  // Call an external processor for word handling.
  search_preprocess($text);
Kjartan Mannes's avatar
Kjartan Mannes committed
348

349
  // Simple CJK handling
350
  if (variable_get('overlap_cjk', TRUE)) {
351
    $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);
352
  }
353

354
355
356
357
358
359
360
  // To improve searching for numerical data such as dates, IP addresses
  // or version numbers, we consider a group of numerical characters
  // separated only by punctuation characters to be one piece.
  // This also means that searching for e.g. '20/03/1984' also returns
  // results with '20-03-1984' in them.
  // Readable regexp: ([number]+)[punctuation]+(?=[number])
  $text = preg_replace('/(['. PREG_CLASS_NUMBERS .']+)['. PREG_CLASS_PUNCTUATION .']+(?=['. PREG_CLASS_NUMBERS .'])/u', '\1', $text);
Kjartan Mannes's avatar
Kjartan Mannes committed
361

362
363
364
  // The dot, underscore and dash are simply removed. This allows meaningful
  // search behaviour with acronyms and URLs.
  $text = preg_replace('/[._-]+/', '', $text);
Kjartan Mannes's avatar
Kjartan Mannes committed
365

366
367
368
  // With the exception of the rules above, we consider all punctuation,
  // marks, spacers, etc, to be a word boundary.
  $text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE . ']+/u', ' ', $text);
Kjartan Mannes's avatar
Kjartan Mannes committed
369

370
371
372
373
374
  return $text;
}

/**
 * Basic CJK tokenizer. Simply splits a string into consecutive, overlapping
375
 * sequences of characters ('minimum_word_size' long).
376
377
 */
function search_expand_cjk($matches) {
378
379
380
381
382
  $min = variable_get('minimum_word_size', 3);
  $str = $matches[0];
  $l = drupal_strlen($str);
  // Passthrough short words
  if ($l <= $min) {
Steven Wittens's avatar
Steven Wittens committed
383
    return ' '. $str .' ';
384
  }
385
  $tokens = ' ';
386
387
  // FIFO queue of characters
  $chars = array();
388
389
390
391
  // Begin loop
  for ($i = 0; $i < $l; ++$i) {
    // Grab next character
    $current = drupal_substr($str, 0, 1);
392
393
394
395
396
397
    $str = substr($str, strlen($current));
    $chars[] = $current;
    if ($i >= $min - 1) {
      $tokens .= implode('', $chars) .' ';
      array_shift($chars);
    }
398
399
400
401
402
403
404
405
  }
  return $tokens;
}

/**
 * Splits a string into tokens for indexing.
 */
function search_index_split($text) {
406
407
  static $last = NULL;
  static $lastsplit = NULL;
408
409
410
411

  if ($last == $text) {
    return $lastsplit;
  }
412
  // Process words
413
  $text = search_simplify($text);
414
  $words = explode(' ', $text);
415
  array_walk($words, '_search_index_truncate');
Kjartan Mannes's avatar
Kjartan Mannes committed
416

417
418
419
420
421
422
423
  // Save last keyword result
  $last = $text;
  $lastsplit = $words;

  return $words;
}

424
/**
425
 * Helper function for array_walk in search_index_split.
426
 */
427
function _search_index_truncate(&$text) {
Steven Wittens's avatar
Steven Wittens committed
428
  $text = truncate_utf8($text, 50);
429
430
}

431
432
433
434
/**
 * Invokes hook_search_preprocess() in modules.
 */
function search_preprocess(&$text) {
435
436
  foreach (module_implements('search_preprocess') as $module) {
    $text = module_invoke($module, 'search_preprocess', $text);
Kjartan Mannes's avatar
Kjartan Mannes committed
437
438
439
440
  }
}

/**
Steven Wittens's avatar
Steven Wittens committed
441
 * Update the full-text search index for a particular item.
442
443
444
 *
 * @param $sid
 *   A number identifying this particular item (e.g. node id).
Kjartan Mannes's avatar
Kjartan Mannes committed
445
 *
446
447
448
449
450
 * @param $type
 *   A string defining this type of item (e.g. 'node')
 *
 * @param $text
 *   The content of this item. Must be a piece of HTML text.
451
452
 *
 * @ingroup search
Kjartan Mannes's avatar
Kjartan Mannes committed
453
 */
454
function search_index($sid, $type, $text) {
455
  $minimum_word_size = variable_get('minimum_word_size', 3);
456

457
  // Link matching
458
  global $base_url;
459
  $node_regexp = '@href=[\'"]?(?:'. preg_quote($base_url, '@') .'/|'. preg_quote(base_path(), '@') .')(?:\?q=)?/?((?![a-z]+:)[^\'">]+)[\'">]@i';
460
461
462

  // Multipliers for scores of words inside certain HTML tags.
  // Note: 'a' must be included for link ranking to work.
463
  $tags = array('h1' => 25,
464
465
466
467
468
                'h2' => 18,
                'h3' => 15,
                'h4' => 12,
                'h5' => 9,
                'h6' => 6,
469
470
471
472
473
                'u' => 3,
                'b' => 3,
                'i' => 3,
                'strong' => 3,
                'em' => 3,
474
475
476
477
478
479
480
481
482
483
484
485
                'a' => 10);

  // Strip off all ignored tags to speed up processing, but insert space before/after
  // them to keep word boundaries.
  $text = str_replace(array('<', '>'), array(' <', '> '), $text);
  $text = strip_tags($text, '<'. implode('><', array_keys($tags)) .'>');

  // Split HTML tags from plain text.
  $split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
  // Note: PHP ensures the array consists of alternating delimiters and literals
  // and begins and ends with a literal (inserting $null as required).

486
487
  $tag = FALSE; // Odd/even counter. Tag or no tag.
  $link = FALSE; // State variable for link analyser
488
  $score = 1; // Starting score per word
489
490
491
492
  $accum = ' '; // Accumulator for cleaned up data
  $tagstack = array(); // Stack with open tags
  $tagwords = 0; // Counter for consecutive words
  $focus = 1; // Focus state
493

494
  $results = array(0 => array()); // Accumulator for words for index
495
496
497
498
499

  foreach ($split as $value) {
    if ($tag) {
      // Increase or decrease score per word based on tag
      list($tagname) = explode(' ', $value, 2);
500
      $tagname = drupal_strtolower($tagname);
501
      // Closing or opening tag?
502
      if ($tagname[0] == '/') {
503
504
505
506
        $tagname = substr($tagname, 1);
        // If we encounter unexpected tags, reset score to avoid incorrect boosting.
        if (!count($tagstack) || $tagstack[0] != $tagname) {
          $tagstack = array();
507
508
          $score = 1;
        }
509
510
511
512
513
        else {
          // Remove from tag stack and decrement score
          $score = max(1, $score - $tags[array_shift($tagstack)]);
        }
        if ($tagname == 'a') {
514
          $link = FALSE;
515
        }
Kjartan Mannes's avatar
Kjartan Mannes committed
516
      }
517
      else {
518
519
520
521
        if ($tagstack[0] == $tagname) {
          // None of the tags we look for make sense when nested identically.
          // If they are, it's probably broken HTML.
          $tagstack = array();
522
          $score = 1;
523
524
525
526
527
528
        }
        else {
          // Add to open tag stack and increment score
          array_unshift($tagstack, $tagname);
          $score += $tags[$tagname];
        }
529
530
531
532
        if ($tagname == 'a') {
          // Check if link points to a node on this site
          if (preg_match($node_regexp, $value, $match)) {
            $path = drupal_get_normal_path($match[1]);
533
            if (preg_match('!(?:node|book)/(?:view/)?([0-9]+)!i', $path, $match)) {
534
535
              $linknid = $match[1];
              if ($linknid > 0) {
536
537
538
                // Note: ignore links to uncachable nodes to avoid redirect bugs.
                $node = db_fetch_object(db_query('SELECT n.title, n.nid, n.vid, r.format FROM {node} n INNER JOIN {node_revisions} r ON n.vid = r.vid WHERE n.nid = %d', $linknid));
                if (filter_format_allowcache($node->format)) {
539
                  $link = TRUE;
540
541
                  $linktitle = $node->title;
                }
542
543
              }
            }
Kjartan Mannes's avatar
Kjartan Mannes committed
544
545
546
          }
        }
      }
547
548
      // A tag change occurred, reset counter.
      $tagwords = 0;
549
550
551
552
    }
    else {
      // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
      if ($value != '') {
553
554
555
556
557
558
559
        if ($link) {
          // Check to see if the node link text is its URL. If so, we use the target node title instead.
          if (preg_match('!^https?://!i', $value)) {
            $value = $linktitle;
          }
        }
        $words = search_index_split($value);
560
        foreach ($words as $word) {
561
562
563
          // Add word to accumulator
          $accum .= $word .' ';
          $num = is_numeric($word);
564
          // Check wordlength
565
566
567
568
569
570
          if ($num || drupal_strlen($word) >= $minimum_word_size) {
            // Normalize numbers
            if ($num) {
              $word = (int)ltrim($word, '-0');
            }

571
572
573
574
            if ($link) {
              if (!isset($results[$linknid])) {
                $results[$linknid] = array();
              }
575
              $results[$linknid][$word] += $score * $focus;
576
577
            }
            else {
578
579
580
581
              $results[0][$word] += $score * $focus;
              // Focus is a decaying value in terms of the amount of unique words up to this point.
              // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words.
              $focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015));
582
583
            }
          }
584
585
586
587
588
589
          $tagwords++;
          // Too many words inside a single tag probably mean a tag was accidentally left open.
          if (count($tagstack) && $tagwords >= 15) {
            $tagstack = array();
            $score = 1;
          }
Dries Buytaert's avatar
   
Dries Buytaert committed
590
        }
Kjartan Mannes's avatar
Kjartan Mannes committed
591
592
      }
    }
593
    $tag = !$tag;
Kjartan Mannes's avatar
Kjartan Mannes committed
594
595
  }

596
  search_wipe($sid, $type, TRUE);
Kjartan Mannes's avatar
Kjartan Mannes committed
597

598
599
600
  // Insert cleaned up data into dataset
  db_query("INSERT INTO {search_dataset} (sid, type, data) VALUES (%d, '%s', '%s')", $sid, $type, $accum);

601
602
  // Insert results into search index
  foreach ($results[0] as $word => $score) {
603
    db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %f)", $word, $sid, $type, $score);
604
605
606
    search_dirty($word);
  }
  unset($results[0]);
Dries Buytaert's avatar
   
Dries Buytaert committed
607

608
609
610
  // Now insert links to nodes
  foreach ($results as $nid => $words) {
    foreach ($words as $word => $score) {
611
      db_query("INSERT INTO {search_index} (word, sid, type, fromsid, fromtype, score) VALUES ('%s', %d, '%s', %d, '%s', %f)", $word, $nid, 'node', $sid, $type, $score);
612
      search_dirty($word);
Kjartan Mannes's avatar
Kjartan Mannes committed
613
614
615
616
    }
  }
}

617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
/**
 * Extract a module-specific search option from a search query. e.g. 'type:book'
 */
function search_query_extract($keys, $option) {
  if (preg_match('/(^| )'. $option .':([^ ]*)( |$)/i', $keys, $matches)) {
    return $matches[2];
  }
}

/**
 * Return a query with the given module-specific search option inserted in.
 * e.g. 'type:book'.
 */
function search_query_insert($keys, $option, $value = '') {
  if (search_query_extract($keys, $option)) {
    $keys = trim(preg_replace('/(^| )'. $option .':[^ ]*/i', '', $keys));
  }
  if ($value != '') {
    $keys .= ' '. $option .':'. $value;
  }
  return $keys;
}

/**
 * Parse a search query into SQL conditions.
 *
643
 * We build a query that matches the dataset bodies.
644
645
646
647
648
649
650
651
652
653
654
655
 */
function search_parse_query($text) {
  $keys = array('positive' => array(), 'negative' => array());

  // Tokenize query string
  preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' '. $text, $matches, PREG_SET_ORDER);

  if (count($matches) < 1) {
    return NULL;
  }

  // Classify tokens
656
  $or = FALSE;
657
  foreach ($matches as $match) {
658
    $phrase = FALSE;
Steven Wittens's avatar
Steven Wittens committed
659
    // Strip off phrase quotes
660
661
    if ($match[2]{0} == '"') {
      $match[2] = substr($match[2], 1, -1);
662
      $phrase = TRUE;
663
    }
Steven Wittens's avatar
Steven Wittens committed
664
    // Simplify keyword according to indexing rules and external preprocessors
665
666
    $words = search_simplify($match[2]);
    // Re-explode in case simplification added more words, except when matching a phrase
Steven Wittens's avatar
Steven Wittens committed
667
    $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
668
669
    // Negative matches
    if ($match[1] == '-') {
670
      $keys['negative'] = array_merge($keys['negative'], $words);
671
672
673
674
    }
    // OR operator: instead of a single keyword, we store an array of all
    // OR'd keywords.
    elseif ($match[2] == 'OR' && count($keys['positive'])) {
Steven Wittens's avatar
Steven Wittens committed
675
676
677
678
679
680
      $last = array_pop($keys['positive']);
      // Starting a new OR?
      if (!is_array($last)) {
        $last = array($last);
      }
      $keys['positive'][] = $last;
681
      $or = TRUE;
682
683
684
685
686
      continue;
    }
    // Plain keyword
    else {
      if ($or) {
687
688
        // Add to last element (which is an array)
        $keys['positive'][count($keys['positive']) - 1] = array_merge($keys['positive'][count($keys['positive']) - 1], $words);
689
690
      }
      else {
691
        $keys['positive'] = array_merge($keys['positive'], $words);
692
693
      }
    }
694
    $or = FALSE;
695
696
697
698
699
700
701
  }

  // Convert keywords into SQL statements.
  $query = array();
  $query2 = array();
  $arguments = array();
  $arguments2 = array();
Steven Wittens's avatar
Steven Wittens committed
702
  $matches = 0;
703
704
705
706
707
  // Positive matches
  foreach ($keys['positive'] as $key) {
    // Group of ORed terms
    if (is_array($key) && count($key)) {
      $queryor = array();
708
      $any = FALSE;
709
      foreach ($key as $or) {
Steven Wittens's avatar
Steven Wittens committed
710
711
        list($q, $count) = _search_parse_query($or, $arguments2);
        $any |= $count;
712
713
714
715
716
717
718
        if ($q) {
          $queryor[] = $q;
          $arguments[] = $or;
        }
      }
      if (count($queryor)) {
        $query[] = '('. implode(' OR ', $queryor) .')';
Steven Wittens's avatar
Steven Wittens committed
719
720
        // A group of OR keywords only needs to match once
        $matches += ($any > 0);
721
722
723
724
      }
    }
    // Single ANDed term
    else {
Steven Wittens's avatar
Steven Wittens committed
725
      list($q, $count) = _search_parse_query($key, $arguments2);
726
727
728
      if ($q) {
        $query[] = $q;
        $arguments[] = $key;
Steven Wittens's avatar
Steven Wittens committed
729
730
        // Each AND keyword needs to match at least once
        $matches += $count;
731
732
733
      }
    }
  }
Steven Wittens's avatar
Steven Wittens committed
734
  // Negative matches
735
  foreach ($keys['negative'] as $key) {
736
    list($q) = _search_parse_query($key, $arguments2, TRUE);
737
738
739
740
741
742
    if ($q) {
      $query[] = $q;
      $arguments[] = $key;
    }
  }
  $query = implode(' AND ', $query);
743

Steven Wittens's avatar
Steven Wittens committed
744
  // Build word-index conditions for the first pass
745
  $query2 = substr(str_repeat("i.word = '%s' OR ", count($arguments2)), 0, -4);
Steven Wittens's avatar
Steven Wittens committed
746
747

  return array($query, $arguments, $query2, $arguments2, $matches);
748
749
750
751
752
}

/**
 * Helper function for search_parse_query();
 */
753
function _search_parse_query(&$word, &$scores, $not = FALSE) {
Steven Wittens's avatar
Steven Wittens committed
754
  $count = 0;
755
756
757
758
759
  // Determine the scorewords of this word/phrase
  if (!$not) {
    $split = explode(' ', $word);
    foreach ($split as $s) {
      $num = is_numeric($s);
760
      if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) {
Steven Wittens's avatar
Steven Wittens committed
761
762
763
764
765
        $s = $num ? ((int)ltrim($s, '-0')) : $s;
        if (!isset($scores[$s])) {
          $scores[$s] = $s;
          $count++;
        }
766
767
768
      }
    }
  }
Steven Wittens's avatar
Steven Wittens committed
769
770
  // Return matching snippet and number of added words
  return array("d.data ". ($not ? 'NOT ' : '') ."LIKE '%% %s %%'", $count);
771
772
}

Kjartan Mannes's avatar
Kjartan Mannes committed
773
/**
Steven Wittens's avatar
Steven Wittens committed
774
 * Do a query on the full-text search index for a word or words.
775
 *
Steven Wittens's avatar
Steven Wittens committed
776
777
 * This function is normally only called by each module that support the
 * indexed search (and thus, implements hook_update_index()).
778
 *
779
780
781
782
 * Two queries are performed which can be extended by the caller.
 *
 * The first query selects a set of possible matches based on the search index
 * and any extra given restrictions. This is the classic "OR" search.
783
 *
784
 * SELECT i.type, i.sid, SUM(i.score*t.count) AS relevance
785
 * FROM {search_index} i
786
787
788
 * INNER JOIN {search_total} t ON i.word = t.word
 * $join1
 * WHERE $where1 AND (...)
789
 * GROUP BY i.type, i.sid
790
791
792
793
794
795
796
797
798
799
800
 *
 * The second query further refines this set by verifying advanced text
 * conditions (such as AND, negative or phrase matches), and orders the results
 * on a the column or expression 'score':
 *
 * SELECT i.type, i.sid, $select2
 * FROM temp_search_sids i
 * INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type
 * $join2
 * WHERE (...)
 * ORDER BY score DESC
801
 *
802
 * @param $keywords
803
804
805
806
 *   A search string as entered by the user.
 *
 * @param $type
 *   A string identifying the calling module.
Kjartan Mannes's avatar
Kjartan Mannes committed
807
 *
808
809
 * @param $join1
 *   (optional) Inserted into the JOIN part of the first SQL query.
810
811
 *   For example "INNER JOIN {node} n ON n.nid = i.sid".
 *
812
813
814
815
816
817
818
819
820
821
822
 * @param $where1
 *   (optional) Inserted into the WHERE part of the first SQL query.
 *   For example "(n.status > %d)".
 *
 * @param $arguments1
 *   (optional) Extra SQL arguments belonging to the first query.
 *
 * @param $select2
 *   (optional) Inserted into the SELECT pat of the second query. Must contain
 *   a column selected as 'score'.
 *   defaults to 'i.relevance AS score'
823
 *
824
825
826
827
828
829
 * @param $join2
 *   (optional) Inserted into the JOIN par of the second SQL query.
 *   For example "INNER JOIN {node_comment_statistics} n ON n.nid = i.sid"
 *
 * @param $arguments2
 *   (optional) Extra SQL arguments belonging to the second query parameter.
830
 *
831
 * @param $sort_parameters
Dries Buytaert's avatar
Dries Buytaert committed
832
 *   (optional) SQL arguments for sorting the final results.
833
834
 *              Default: 'ORDER BY score DESC'
 *
835
836
 * @return
 *   An array of SIDs for the search results.
837
838
 *
 * @ingroup search
Kjartan Mannes's avatar
Kjartan Mannes committed
839
 */
840
function do_search($keywords, $type, $join1 = '', $where1 = '1', $arguments1 = array(), $select2 = 'i.relevance AS score', $join2 = '', $arguments2 = array(), $sort_parameters = 'ORDER BY score DESC') {
841
  $query = search_parse_query($keywords);
842

Steven Wittens's avatar
Steven Wittens committed
843
  if ($query[2] == '') {
844
    form_set_error('keys', t('You must include at least one positive keyword with @count characters or more.', array('@count' => variable_get('minimum_word_size', 3))));
Steven Wittens's avatar
Steven Wittens committed
845
  }
846
847
  if ($query === NULL || $query[0] == '' || $query[2] == '') {
    return array();
848
  }
849

850
851
852
  // First pass: select all possible matching sids, doing a simple index-based OR matching on the keywords.
  // 'matches' is used to reject those items that cannot possibly match the query.
  $conditions = $where1 .' AND ('. $query[2] .") AND i.type = '%s'";
Steven Wittens's avatar
Steven Wittens committed
853
  $arguments = array_merge($arguments1, $query[3], array($type, $query[4]));
854
  $result = db_query_temporary("SELECT i.type, i.sid, SUM(i.score * t.count) AS relevance, COUNT(*) AS matches FROM {search_index} i INNER JOIN {search_total} t ON i.word = t.word $join1 WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d", $arguments, 'temp_search_sids');
855
856

  // Calculate maximum relevance, to normalize it
857
  $normalize = db_result(db_query('SELECT MAX(relevance) FROM temp_search_sids'));
858
  if (!$normalize) {
859
860
    return array();
  }
861
  $select2 = str_replace('i.relevance', '('. (1.0 / $normalize) .' * i.relevance)', $select2);
862

863
864
865
  // Second pass: only keep items that match the complicated keywords conditions (phrase search, negative keywords, ...)
  $conditions = '('. $query[0] .')';
  $arguments = array_merge($arguments2, $query[1]);
866
  $result = db_query_temporary("SELECT i.type, i.sid, $select2 FROM temp_search_sids i INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type $join2 WHERE $conditions $sort_parameters", $arguments, 'temp_search_results');
867
  if (($count = db_result(db_query('SELECT COUNT(*) FROM temp_search_results'))) == 0) {
868
    return array();
869
870
871
  }
  $count_query = "SELECT $count";

872
  // Do actual search query
873
  $result = pager_query("SELECT * FROM temp_search_results", 10, 0, $count_query);
874
875
  $results = array();
  while ($item = db_fetch_object($result)) {
876
    $results[] = $item;
877
878
  }
  return $results;
Kjartan Mannes's avatar
Kjartan Mannes committed
879
880
}

881
882
883
884
885
886
887
888
889
890
/**
 * Helper function for grabbing search keys.
 */
function search_get_keys() {
  // Extract keys as remainder of path
  // Note: support old GET format of searches for existing links.
  $path = explode('/', $_GET['q'], 3);
  return count($path) == 3 ? $path[2] : $_REQUEST['keys'];
}

Dries Buytaert's avatar
   
Dries Buytaert committed
891
892
893
/**
 * Menu callback; presents the search form and/or search results.
 */
894
function search_view($type = '') {
895
896
897
  // Search form submits with POST but redirects to GET. This way we can keep
  // the search query URL clean as a whistle:
  // search/type/keyword+keyword
898
  if (!isset($_POST['form_id'])) {
899
    if ($type == '') {
900
901
902
903
      // Note: search/node can not be a default tab because it would take on the
      // path of its parent (search). It would prevent remembering keywords when
      // switching tabs. This is why we drupal_goto to it from the parent instead.
      drupal_goto('search/node');
904
    }
Dries Buytaert's avatar
   
Dries Buytaert committed
905

906
    $keys = search_get_keys();
907
908
909
    // Only perform search if there is non-whitespace search term:
    if (trim($keys)) {
      // Log the search keys:
910
      watchdog('search', t('%keys (@type).', array('%keys' => $keys, '@type' => module_invoke($type, 'search', 'name'))), WATCHDOG_NOTICE, l(t('results'), 'search/'. $type .'/'. $keys));
Dries Buytaert's avatar
   
Dries Buytaert committed
911

912
      // Collect the search results:
913
      $results = search_data($keys, $type);
Dries Buytaert's avatar
 
Dries Buytaert committed
914

Dries Buytaert's avatar
   
Dries Buytaert committed
915
      if ($results) {
916
        $results = theme('box', t('Search results'), $results);
Dries Buytaert's avatar
   
Dries Buytaert committed
917
918
      }
      else {
919
        $results = theme('box', t('Your search yielded no results'), search_help('search#noresults'));
Dries Buytaert's avatar
   
Dries Buytaert committed
920
      }
Dries Buytaert's avatar
 
Dries Buytaert committed
921
    }
922
923

    // Construct the search form.
924
    $output = drupal_get_form('search_form', NULL, $keys, $type);
925
    $output .= $results;
Dries Buytaert's avatar
 
Dries Buytaert committed
926

Dries Buytaert's avatar
   
Dries Buytaert committed
927
    return $output;
928
  }
929

930
  return drupal_get_form('search_form', NULL, $keys, $type);
Kjartan Mannes's avatar
Kjartan Mannes committed
931
932
}

933
934
935
936
937
938
939
940
/**
 * @defgroup search Search interface
 * @{
 * The Drupal search interface manages a global search mechanism.
 *
 * Modules may plug into this system to provide searches of different types of
 * data. Most of the system is handled by search.module, so this must be enabled
 * for all of the search features to work.
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
 *
 * There are three ways to interact with the search system:
 * - Specifically for searching nodes, you can implement nodeapi('update index')
 *   and nodeapi('search result'). However, note that the search system already
 *   indexes all visible output of a node, i.e. everything displayed normally
 *   by hook_view() and hook_nodeapi('view'). This is usually sufficient.
 *   You should only use this mechanism if you want additional, non-visible data
 *   to be indexed.
 * - Implement hook_search(). This will create a search tab for your module on
 *   the /search page with a simple keyword search form. You may optionally
 *   implement hook_search_item() to customize the display of your results.
 * - Implement hook_update_index(). This allows your module to use Drupal's
 *   HTML indexing mechanism for searching full text efficiently.
 *
 * If your module needs to provide a more complicated search form, then you need
Dries Buytaert's avatar
Dries Buytaert committed
956
 * to implement it yourself without hook_search(). In that case, you should
957
958
 * define it as a local task (tab) under the /search page (e.g. /search/mymodule)
 * so that users can easily find it.
959
960
961
962
963
964
965
966
967
 */

/**
 * Render a search form.
 *
 * @param $action
 *   Form action. Defaults to "search".
 * @param $keys
 *   The search string entered by the user, containing keywords for the search.
968
969
970
 * @param $type
 *   The type of search to render the node for. Must be the name of module
 *   which implements hook_search(). Defaults to 'node'.
971
972
 * @param $prompt
 *   A piece of text to put before the form (e.g. "Enter your keywords")
973
974
975
 * @return
 *   An HTML string containing the search form.
 */
976
function search_form($action = '', $keys = '', $type = NULL, $prompt = NULL) {
977
  if (!$action) {
978
    $action = url('search/'. $type);
979
  }
980
981
982
  if (is_null($prompt)) {
    $prompt = t('Enter your keywords');
  }
983

984
985
986
987
988
  $form = array(
    '#action' => $action,
    '#attributes' => array('class' => 'search-form'),
  );
  $form['module'] = array('#type' => 'value', '#value' => $type);
989
  $form['basic'] = array('#type' => 'item', '#title' => $prompt);
990
991
992
993
994