search.module 44 KB
Newer Older
Dries's avatar
   
Dries committed
1
<?php
2
// $Id$
Dries's avatar
   
Dries committed
3

Dries's avatar
   
Dries committed
4
5
6
7
8
/**
 * @file
 * Enables site-wide keyword searching.
 */

9
/**
10
 * Matches Unicode character classes to exclude from the search index.
Steven Wittens's avatar
Steven Wittens committed
11
 *
12
13
 * See: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
 *
14
 * The index only contains the following character classes:
15
16
17
18
19
20
21
 * Lu     Letter, Uppercase
 * Ll     Letter, Lowercase
 * Lt     Letter, Titlecase
 * Lo     Letter, Other
 * Nd     Number, Decimal Digit
 * No     Number, Other
 */
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
define('PREG_CLASS_SEARCH_EXCLUDE',
'\x{0}-\x{2f}\x{3a}-\x{40}\x{5b}-\x{60}\x{7b}-\x{bf}\x{d7}\x{f7}\x{2b0}-'.
'\x{385}\x{387}\x{3f6}\x{482}-\x{489}\x{559}-\x{55f}\x{589}-\x{5c7}\x{5f3}-'.
'\x{61f}\x{640}\x{64b}-\x{65e}\x{66a}-\x{66d}\x{670}\x{6d4}\x{6d6}-\x{6ed}'.
'\x{6fd}\x{6fe}\x{700}-\x{70f}\x{711}\x{730}-\x{74a}\x{7a6}-\x{7b0}\x{901}-'.
'\x{903}\x{93c}\x{93e}-\x{94d}\x{951}-\x{954}\x{962}-\x{965}\x{970}\x{981}-'.
'\x{983}\x{9bc}\x{9be}-\x{9cd}\x{9d7}\x{9e2}\x{9e3}\x{9f2}-\x{a03}\x{a3c}-'.
'\x{a4d}\x{a70}\x{a71}\x{a81}-\x{a83}\x{abc}\x{abe}-\x{acd}\x{ae2}\x{ae3}'.
'\x{af1}-\x{b03}\x{b3c}\x{b3e}-\x{b57}\x{b70}\x{b82}\x{bbe}-\x{bd7}\x{bf0}-'.
'\x{c03}\x{c3e}-\x{c56}\x{c82}\x{c83}\x{cbc}\x{cbe}-\x{cd6}\x{d02}\x{d03}'.
'\x{d3e}-\x{d57}\x{d82}\x{d83}\x{dca}-\x{df4}\x{e31}\x{e34}-\x{e3f}\x{e46}-'.
'\x{e4f}\x{e5a}\x{e5b}\x{eb1}\x{eb4}-\x{ebc}\x{ec6}-\x{ecd}\x{f01}-\x{f1f}'.
'\x{f2a}-\x{f3f}\x{f71}-\x{f87}\x{f90}-\x{fd1}\x{102c}-\x{1039}\x{104a}-'.
'\x{104f}\x{1056}-\x{1059}\x{10fb}\x{10fc}\x{135f}-\x{137c}\x{1390}-\x{1399}'.
'\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16f0}\x{1712}-\x{1714}'.
'\x{1732}-\x{1736}\x{1752}\x{1753}\x{1772}\x{1773}\x{17b4}-\x{17db}\x{17dd}'.
'\x{17f0}-\x{180e}\x{1843}\x{18a9}\x{1920}-\x{1945}\x{19b0}-\x{19c0}\x{19c8}'.
'\x{19c9}\x{19de}-\x{19ff}\x{1a17}-\x{1a1f}\x{1d2c}-\x{1d61}\x{1d78}\x{1d9b}-'.
'\x{1dc3}\x{1fbd}\x{1fbf}-\x{1fc1}\x{1fcd}-\x{1fcf}\x{1fdd}-\x{1fdf}\x{1fed}-'.
'\x{1fef}\x{1ffd}-\x{2070}\x{2074}-\x{207e}\x{2080}-\x{2101}\x{2103}-\x{2106}'.
'\x{2108}\x{2109}\x{2114}\x{2116}-\x{2118}\x{211e}-\x{2123}\x{2125}\x{2127}'.
'\x{2129}\x{212e}\x{2132}\x{213a}\x{213b}\x{2140}-\x{2144}\x{214a}-\x{2b13}'.
'\x{2ce5}-\x{2cff}\x{2d6f}\x{2e00}-\x{3005}\x{3007}-\x{303b}\x{303d}-\x{303f}'.
'\x{3099}-\x{309e}\x{30a0}\x{30fb}-\x{30fe}\x{3190}-\x{319f}\x{31c0}-\x{31cf}'.
'\x{3200}-\x{33ff}\x{4dc0}-\x{4dff}\x{a015}\x{a490}-\x{a716}\x{a802}\x{a806}'.
'\x{a80b}\x{a823}-\x{a82b}\x{d800}-\x{f8ff}\x{fb1e}\x{fb29}\x{fd3e}\x{fd3f}'.
'\x{fdfc}-\x{fe6b}\x{feff}-\x{ff0f}\x{ff1a}-\x{ff20}\x{ff3b}-\x{ff40}\x{ff5b}-'.
'\x{ff65}\x{ff70}\x{ff9e}\x{ff9f}\x{ffe0}-\x{fffd}');
50
51

/**
Steven Wittens's avatar
Steven Wittens committed
52
 * Matches all 'N' Unicode character classes (numbers)
53
 */
54
55
56
57
58
59
60
61
62
63
define('PREG_CLASS_NUMBERS',
'\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}'.
'\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}'.
'\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}'.
'\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-'.
'\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}'.
'\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}'.
'\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}'.
'\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-'.
'\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}');
64
65

/**
Steven Wittens's avatar
Steven Wittens committed
66
 * Matches all 'P' Unicode character classes (punctuation)
67
 */
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
define('PREG_CLASS_PUNCTUATION',
'\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}'.
'\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}'.
'\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}'.
'\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}'.
'\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}'.
'\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}'.
'\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}'.
'\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}'.
'\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-'.
'\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}'.
'\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}'.
'\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}'.
'\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}'.
'\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-'.
'\x{ff65}');

/**
 * Matches all CJK characters that are candidates for auto-splitting
 * (Chinese, Japanese, Korean).
 * Contains kana and BMP ideographs.
 */
define('PREG_CLASS_CJK', '\x{3041}-\x{30ff}\x{31f0}-\x{31ff}\x{3400}-\x{4db5}'.
'\x{4e00}-\x{9fbb}\x{f900}-\x{fad9}');
92

Dries's avatar
   
Dries committed
93
94
95
/**
 * Implementation of hook_help().
 */
96
97
function search_help($path, $arg) {
  switch ($path) {
98
    case 'admin/help#search':
99
      $output = '<p>'. t('The search module adds the ability to search for content by keywords. Search is often the only practical way to find content on a large site, and is useful for finding both users and posts.') .'</p>';
100
      $output .= '<p>'. t('To provide keyword searching, the search engine maintains an index of words found in your site\'s content. To build and maintain this index, a correctly configured <a href="@cron">cron maintenance task</a> is required. Indexing behavior can be adjusted using the <a href="@searchsettings">search settings page</a>; for example, the <em>Number of items to index per cron run</em> sets the maximum number of items indexed in each pass of a <a href="@cron">cron maintenance task</a>. If necessary, reduce this number to prevent timeouts and memory errors when indexing.', array('@cron' => url('admin/reports/status'), '@searchsettings' => url('admin/settings/search'))) .'</p>';
101
      $output .= '<p>'. t('For more information, see the online handbook entry for <a href="@search">Search module</a>.', array('@search' => 'http://drupal.org/handbook/modules/search/')) .'</p>';
102
      return $output;
103
    case 'admin/settings/search':
104
      return '<p>'. t('The search engine maintains an index of words found in your site\'s content. To build and maintain this index, a correctly configured <a href="@cron">cron maintenance task</a> is required. Indexing behavior can be adjusted using the settings below.', array('@cron' => url('admin/reports/status'))) .'</p>';
105
    case 'search#noresults':
106
      return t('<ul>
107
<li>Check if your spelling is correct.</li>
108
109
<li>Remove quotes around phrases to match each word individually: <em>"blue smurf"</em> will match less than <em>blue smurf</em>.</li>
<li>Consider loosening your query with <em>OR</em>: <em>blue smurf</em> will match less than <em>blue OR smurf</em>.</li>
110
</ul>');
Dries's avatar
   
Dries committed
111
  }
Dries's avatar
   
Dries committed
112
}
Kjartan's avatar
Kjartan committed
113

114
115
116
117
118
119
120
/**
 * Implementation of hook_theme()
 */
function search_theme() {
  return array(
    'search_theme_form' => array(
      'arguments' => array('form' => NULL),
121
      'template' => 'search-theme-form',
122
123
124
    ),
    'search_block_form' => array(
      'arguments' => array('form' => NULL),
125
      'template' => 'search-block-form',
126
    ),
127
128
    'search_result' => array(
      'arguments' => array('result' => NULL, 'type' => NULL),
129
      'file' => 'search.pages.inc',
130
      'template' => 'search-result',
131
    ),
132
    'search_results' => array(
133
      'arguments' => array('results' => NULL, 'type' => NULL),
134
      'file' => 'search.pages.inc',
135
      'template' => 'search-results',
136
137
138
139
    ),
  );
}

Kjartan's avatar
Kjartan committed
140
/**
Dries's avatar
   
Dries committed
141
 * Implementation of hook_perm().
Kjartan's avatar
Kjartan committed
142
 */
Dries's avatar
   
Dries committed
143
function search_perm() {
144
  return array('search content', 'use advanced search', 'administer search');
Dries's avatar
   
Dries committed
145
146
}

147
148
149
150
151
152
/**
 * Implementation of hook_block().
 */
function search_block($op = 'list', $delta = 0) {
  if ($op == 'list') {
    $blocks[0]['info'] = t('Search form');
153
154
    // Not worth caching.
    $blocks[0]['cache'] = BLOCK_NO_CACHE;
155
156
    return $blocks;
  }
157
  else if ($op == 'view' && user_access('search content')) {
158
    $block['content'] = drupal_get_form('search_block_form');
159
160
161
162
163
    $block['subject'] = t('Search');
    return $block;
  }
}

Dries's avatar
   
Dries committed
164
165
166
/**
 * Implementation of hook_menu().
 */
167
168
function search_menu() {
  $items['search'] = array(
169
    'title' => 'Search',
170
171
172
    'page callback' => 'search_view',
    'access arguments' => array('search content'),
    'type' => MENU_SUGGESTED_ITEM,
173
    'file' => 'search.pages.inc',
174
175
  );
  $items['admin/settings/search'] = array(
176
177
    'title' => 'Search settings',
    'description' => 'Configure relevance settings for search and other indexing options',
178
179
180
181
    'page callback' => 'drupal_get_form',
    'page arguments' => array('search_admin_settings'),
    'access arguments' => array('administer search'),
    'type' => MENU_NORMAL_ITEM,
182
    'file' => 'search.admin.inc',
183
184
  );
  $items['admin/settings/search/wipe'] = array(
185
    'title' => 'Clear index',
186
187
188
189
    'page callback' => 'drupal_get_form',
    'page arguments' => array('search_wipe_confirm'),
    'access arguments' => array('administer search'),
    'type' => MENU_CALLBACK,
190
    'file' => 'search.admin.inc',
191
  );
192
  $items['admin/reports/search'] = array(
193
194
    'title' => 'Top search phrases',
    'description' => 'View most popular search phrases.',
195
    'page callback' => 'dblog_top',
196
    'page arguments' => array('search'),
197
198
    'file' => 'dblog.admin.inc',
    'file path' => drupal_get_path('module', 'dblog'),
199
  );
Dries's avatar
   
Dries committed
200

201
  foreach (module_implements('search') as $name) {
202
    $items['search/'. $name .'/%menu_tail'] = array(
203
204
      'title callback' => 'module_invoke',
      'title arguments' => array($name, 'search', 'name', TRUE),
205
206
      'page callback' => 'search_view',
      'page arguments' => array($name),
207
208
      'access callback' => '_search_menu',
      'access arguments' => array($name),
209
      'type' => MENU_LOCAL_TASK,
210
      'parent' => 'search',
211
      'file' => 'search.pages.inc',
212
    );
213
  }
214
215
216
  return $items;
}

217
218
function _search_menu($name) {
  return user_access('search content') && module_invoke($name, 'search', 'name');
Dries's avatar
   
Dries committed
219
220
}

Dries's avatar
Dries committed
221
/**
222
223
224
225
226
227
228
 * Wipes a part of or the entire search index.
 *
 * @param $sid
 *  (optional) The SID of the item to wipe. If specified, $type must be passed
 *  too.
 * @param $type
 *  (optional) The type of item to wipe.
Dries's avatar
Dries committed
229
 */
230
function search_wipe($sid = NULL, $type = NULL, $reindex = FALSE) {
231
232
233
234
  if ($type == NULL && $sid == NULL) {
    module_invoke_all('search', 'reset');
  }
  else {
235
    db_query("DELETE FROM {search_dataset} WHERE sid = %d AND type = '%s'", $sid, $type);
236
237
238
239
240
    db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type);
    // Don't remove links if re-indexing.
    if (!$reindex) {
      db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type);
    }
241
  }
Dries's avatar
Dries committed
242
243
}

244
245
246
247
248
/**
 * Marks a word as dirty (or retrieves the list of dirty words). This is used
 * during indexing (cron). Words which are dirty have outdated total counts in
 * the search_total table, and need to be recounted.
 */
249
function search_dirty($word = NULL) {
250
  static $dirty = array();
251
252
  if ($word !== NULL) {
    $dirty[$word] = TRUE;
253
254
255
256
257
258
  }
  else {
    return $dirty;
  }
}

Kjartan's avatar
Kjartan committed
259
/**
Dries's avatar
   
Dries committed
260
261
 * Implementation of hook_cron().
 *
262
263
 * Fires hook_update_index() in all modules and cleans up dirty words (see
 * search_dirty).
Kjartan's avatar
Kjartan committed
264
265
 */
function search_cron() {
266
267
268
269
  // We register a shutdown function to ensure that search_total is always up
  // to date.
  register_shutdown_function('search_update_totals');

Dries's avatar
Dries committed
270
  // Update word index
Kjartan's avatar
Kjartan committed
271
  foreach (module_list() as $module) {
272
273
    module_invoke($module, 'update_index');
  }
274
275
276
277
278
279
280
}

/**
 * This function is called on shutdown to ensure that search_total is always
 * up to date (even if cron times out or otherwise fails).
 */
function search_update_totals() {
281
  // Update word IDF (Inverse Document Frequency) counts for new/changed words
282
  foreach (search_dirty() as $word => $dummy) {
283
    // Get total count
284
    $total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word));
285
286
287
    // Apply Zipf's law to equalize the probability distribution
    $total = log10(1 + 1/(max(1, $total)));
    db_query("UPDATE {search_total} SET count = %f WHERE word = '%s'", $total, $word);
Dries's avatar
Dries committed
288
    if (!db_affected_rows()) {
289
      db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %f)", $word, $total);
Dries's avatar
Dries committed
290
291
292
293
294
295
296
297
    }
  }
  // Find words that were deleted from search_index, but are still in
  // search_total. We use a LEFT JOIN between the two tables and keep only the
  // rows which fail to join.
  $result = db_query("SELECT t.word AS realword, i.word FROM {search_total} t LEFT JOIN {search_index} i ON t.word = i.word WHERE i.word IS NULL");
  while ($word = db_fetch_object($result)) {
    db_query("DELETE FROM {search_total} WHERE word = '%s'", $word->realword);
Kjartan's avatar
Kjartan committed
298
299
300
301
  }
}

/**
302
 * Simplifies a string according to indexing rules.
Kjartan's avatar
Kjartan committed
303
 */
304
function search_simplify($text) {
305
306
  // Decode entities to UTF-8
  $text = decode_entities($text);
307

308
309
310
  // Lowercase
  $text = drupal_strtolower($text);

311
  // Call an external processor for word handling.
312
  search_invoke_preprocess($text);
Kjartan's avatar
Kjartan committed
313

314
  // Simple CJK handling
315
  if (variable_get('overlap_cjk', TRUE)) {
Dries's avatar
Dries committed
316
    $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);
317
  }
318

319
320
321
322
323
324
325
  // To improve searching for numerical data such as dates, IP addresses
  // or version numbers, we consider a group of numerical characters
  // separated only by punctuation characters to be one piece.
  // This also means that searching for e.g. '20/03/1984' also returns
  // results with '20-03-1984' in them.
  // Readable regexp: ([number]+)[punctuation]+(?=[number])
  $text = preg_replace('/(['. PREG_CLASS_NUMBERS .']+)['. PREG_CLASS_PUNCTUATION .']+(?=['. PREG_CLASS_NUMBERS .'])/u', '\1', $text);
Kjartan's avatar
Kjartan committed
326

327
  // The dot, underscore and dash are simply removed. This allows meaningful
328
  // search behavior with acronyms and URLs.
329
  $text = preg_replace('/[._-]+/', '', $text);
Kjartan's avatar
Kjartan committed
330

331
332
  // With the exception of the rules above, we consider all punctuation,
  // marks, spacers, etc, to be a word boundary.
333
  $text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE .']+/u', ' ', $text);
Kjartan's avatar
Kjartan committed
334

335
336
337
338
339
  return $text;
}

/**
 * Basic CJK tokenizer. Simply splits a string into consecutive, overlapping
340
 * sequences of characters ('minimum_word_size' long).
341
342
 */
function search_expand_cjk($matches) {
343
344
345
346
347
  $min = variable_get('minimum_word_size', 3);
  $str = $matches[0];
  $l = drupal_strlen($str);
  // Passthrough short words
  if ($l <= $min) {
Steven Wittens's avatar
Steven Wittens committed
348
    return ' '. $str .' ';
349
  }
350
  $tokens = ' ';
351
352
  // FIFO queue of characters
  $chars = array();
353
354
355
356
  // Begin loop
  for ($i = 0; $i < $l; ++$i) {
    // Grab next character
    $current = drupal_substr($str, 0, 1);
357
358
359
360
361
362
    $str = substr($str, strlen($current));
    $chars[] = $current;
    if ($i >= $min - 1) {
      $tokens .= implode('', $chars) .' ';
      array_shift($chars);
    }
363
364
365
366
367
368
369
370
  }
  return $tokens;
}

/**
 * Splits a string into tokens for indexing.
 */
function search_index_split($text) {
371
372
  static $last = NULL;
  static $lastsplit = NULL;
373
374
375
376

  if ($last == $text) {
    return $lastsplit;
  }
377
  // Process words
378
  $text = search_simplify($text);
379
  $words = explode(' ', $text);
380
  array_walk($words, '_search_index_truncate');
Kjartan's avatar
Kjartan committed
381

382
383
384
385
386
387
388
  // Save last keyword result
  $last = $text;
  $lastsplit = $words;

  return $words;
}

389
/**
390
 * Helper function for array_walk in search_index_split.
391
 */
392
function _search_index_truncate(&$text) {
Steven Wittens's avatar
Steven Wittens committed
393
  $text = truncate_utf8($text, 50);
394
395
}

396
397
398
/**
 * Invokes hook_search_preprocess() in modules.
 */
399
function search_invoke_preprocess(&$text) {
400
401
  foreach (module_implements('search_preprocess') as $module) {
    $text = module_invoke($module, 'search_preprocess', $text);
Kjartan's avatar
Kjartan committed
402
403
404
405
  }
}

/**
Steven Wittens's avatar
Steven Wittens committed
406
 * Update the full-text search index for a particular item.
407
408
409
 *
 * @param $sid
 *   A number identifying this particular item (e.g. node id).
Kjartan's avatar
Kjartan committed
410
 *
411
412
413
414
415
 * @param $type
 *   A string defining this type of item (e.g. 'node')
 *
 * @param $text
 *   The content of this item. Must be a piece of HTML text.
416
417
 *
 * @ingroup search
Kjartan's avatar
Kjartan committed
418
 */
419
function search_index($sid, $type, $text) {
420
  $minimum_word_size = variable_get('minimum_word_size', 3);
421

422
  // Link matching
423
  global $base_url;
424
  $node_regexp = '@href=[\'"]?(?:'. preg_quote($base_url, '@') .'/|'. preg_quote(base_path(), '@') .')(?:\?q=)?/?((?![a-z]+:)[^\'">]+)[\'">]@i';
425
426
427

  // Multipliers for scores of words inside certain HTML tags.
  // Note: 'a' must be included for link ranking to work.
428
  $tags = array('h1' => 25,
429
430
431
432
433
                'h2' => 18,
                'h3' => 15,
                'h4' => 12,
                'h5' => 9,
                'h6' => 6,
434
435
436
437
438
                'u' => 3,
                'b' => 3,
                'i' => 3,
                'strong' => 3,
                'em' => 3,
439
440
441
442
443
444
445
446
447
448
449
450
                'a' => 10);

  // Strip off all ignored tags to speed up processing, but insert space before/after
  // them to keep word boundaries.
  $text = str_replace(array('<', '>'), array(' <', '> '), $text);
  $text = strip_tags($text, '<'. implode('><', array_keys($tags)) .'>');

  // Split HTML tags from plain text.
  $split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
  // Note: PHP ensures the array consists of alternating delimiters and literals
  // and begins and ends with a literal (inserting $null as required).

451
452
  $tag = FALSE; // Odd/even counter. Tag or no tag.
  $link = FALSE; // State variable for link analyser
453
  $score = 1; // Starting score per word
454
455
456
457
  $accum = ' '; // Accumulator for cleaned up data
  $tagstack = array(); // Stack with open tags
  $tagwords = 0; // Counter for consecutive words
  $focus = 1; // Focus state
458

459
  $results = array(0 => array()); // Accumulator for words for index
460
461
462
463
464

  foreach ($split as $value) {
    if ($tag) {
      // Increase or decrease score per word based on tag
      list($tagname) = explode(' ', $value, 2);
465
      $tagname = drupal_strtolower($tagname);
466
      // Closing or opening tag?
467
      if ($tagname[0] == '/') {
468
469
470
471
        $tagname = substr($tagname, 1);
        // If we encounter unexpected tags, reset score to avoid incorrect boosting.
        if (!count($tagstack) || $tagstack[0] != $tagname) {
          $tagstack = array();
472
473
          $score = 1;
        }
474
475
476
477
478
        else {
          // Remove from tag stack and decrement score
          $score = max(1, $score - $tags[array_shift($tagstack)]);
        }
        if ($tagname == 'a') {
479
          $link = FALSE;
480
        }
Kjartan's avatar
Kjartan committed
481
      }
482
      else {
483
        if (isset($tagstack[0]) && $tagstack[0] == $tagname) {
484
485
486
          // None of the tags we look for make sense when nested identically.
          // If they are, it's probably broken HTML.
          $tagstack = array();
487
          $score = 1;
488
489
490
491
492
493
        }
        else {
          // Add to open tag stack and increment score
          array_unshift($tagstack, $tagname);
          $score += $tags[$tagname];
        }
494
495
496
497
        if ($tagname == 'a') {
          // Check if link points to a node on this site
          if (preg_match($node_regexp, $value, $match)) {
            $path = drupal_get_normal_path($match[1]);
498
            if (preg_match('!(?:node|book)/(?:view/)?([0-9]+)!i', $path, $match)) {
499
500
              $linknid = $match[1];
              if ($linknid > 0) {
501
502
503
                // Note: ignore links to uncachable nodes to avoid redirect bugs.
                $node = db_fetch_object(db_query('SELECT n.title, n.nid, n.vid, r.format FROM {node} n INNER JOIN {node_revisions} r ON n.vid = r.vid WHERE n.nid = %d', $linknid));
                if (filter_format_allowcache($node->format)) {
504
                  $link = TRUE;
505
506
                  $linktitle = $node->title;
                }
507
508
              }
            }
Kjartan's avatar
Kjartan committed
509
510
511
          }
        }
      }
512
513
      // A tag change occurred, reset counter.
      $tagwords = 0;
514
515
516
517
    }
    else {
      // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
      if ($value != '') {
518
519
520
521
522
523
524
        if ($link) {
          // Check to see if the node link text is its URL. If so, we use the target node title instead.
          if (preg_match('!^https?://!i', $value)) {
            $value = $linktitle;
          }
        }
        $words = search_index_split($value);
525
        foreach ($words as $word) {
526
527
528
          // Add word to accumulator
          $accum .= $word .' ';
          $num = is_numeric($word);
529
          // Check wordlength
530
531
532
533
534
535
          if ($num || drupal_strlen($word) >= $minimum_word_size) {
            // Normalize numbers
            if ($num) {
              $word = (int)ltrim($word, '-0');
            }

536
            // Links score mainly for the target.
537
538
539
540
            if ($link) {
              if (!isset($results[$linknid])) {
                $results[$linknid] = array();
              }
541
542
543
              $results[$linknid][] = $word;
              // Reduce score of the link caption in the source.
              $focus *= 0.2;
544
            }
545
546
547
            // Fall-through
            if (!isset($results[0][$word])) {
              $results[0][$word] = 0;
548
            }
549
550
551
552
553
            $results[0][$word] += $score * $focus;

            // Focus is a decaying value in terms of the amount of unique words up to this point.
            // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words.
            $focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015));
554
          }
555
556
557
558
559
560
          $tagwords++;
          // Too many words inside a single tag probably mean a tag was accidentally left open.
          if (count($tagstack) && $tagwords >= 15) {
            $tagstack = array();
            $score = 1;
          }
Dries's avatar
   
Dries committed
561
        }
Kjartan's avatar
Kjartan committed
562
563
      }
    }
564
    $tag = !$tag;
Kjartan's avatar
Kjartan committed
565
566
  }

567
  search_wipe($sid, $type, TRUE);
Kjartan's avatar
Kjartan committed
568

569
  // Insert cleaned up data into dataset
570
  db_query("INSERT INTO {search_dataset} (sid, type, data, reindex) VALUES (%d, '%s', '%s', %d)", $sid, $type, $accum, 0);
571

572
573
  // Insert results into search index
  foreach ($results[0] as $word => $score) {
574
    db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %f)", $word, $sid, $type, $score);
575
576
577
    search_dirty($word);
  }
  unset($results[0]);
Dries's avatar
   
Dries committed
578

579
580
581
582
583
584
585
586
  // Get all previous links from this item.
  $result = db_query("SELECT nid, caption FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type);
  $links = array();
  while ($link = db_fetch_object($result)) {
    $links[$link->nid] = $link->caption;
  }

  // Now store links to nodes.
587
  foreach ($results as $nid => $words) {
588
589
590
591
592
593
594
595
596
597
598
599
600
601
    $caption = implode(' ', $words);
    if (isset($links[$nid])) {
      if ($links[$nid] != $caption) {
        // Update the existing link and mark the node for reindexing.
        db_query("UPDATE {search_node_links} SET caption = '%s' WHERE sid = %d AND type = '%s' AND nid = %d", $caption, $sid, $type, $nid);
        search_touch_node($nid);
      }
      // Unset the link to mark it as processed.
      unset($links[$nid]);
    }
    else {
      // Insert the existing link and mark the node for reindexing.
      db_query("INSERT INTO {search_node_links} (caption, sid, type, nid) VALUES ('%s', %d, '%s', %d)", $caption, $sid, $type, $nid);
      search_touch_node($nid);
Kjartan's avatar
Kjartan committed
602
603
    }
  }
604
  // Any left-over links in $links no longer exist. Delete them and mark the nodes for reindexing.
605
  foreach ($links as $nid => $caption) {
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
    db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s' AND nid = %d", $sid, $type, $nid);
    search_touch_node($nid);
  }
}

/**
 * Change a node's changed timestamp to 'now' to force reindexing.
 *
 * @param $nid
 *   The nid of the node that needs reindexing.
 */
function search_touch_node($nid) {
  db_query("UPDATE {search_dataset} SET reindex = %d WHERE sid = %d AND type = 'node'", time(), $nid);
}

/**
 * Implementation of hook_nodeapi().
 */
function search_nodeapi(&$node, $op, $teaser = NULL, $page = NULL) {
  switch ($op) {
    // Transplant links to a node into the target node.
    case 'update index':
      $result = db_query("SELECT caption FROM {search_node_links} WHERE nid = %d", $node->nid);
      $output = array();
      while ($link = db_fetch_object($result)) {
        $output[] = $link->caption;
      }
      return '<a>('. implode(', ', $output) .')</a>';
    // Reindex the node when it is updated.  The node is automatically indexed
    // when it is added, simply by being added to the node table.
    case 'update':
      search_touch_node($node->nid);
      break;
  }
}

/**
 * Implementation of hook_comment().
 */
function search_comment($a1, $op) {
  switch ($op) {
    // Reindex the node when comments are added or changed
    case 'insert':
    case 'update':
    case 'delete':
    case 'publish':
    case 'unpublish':
      search_touch_node(is_array($a1) ? $a1['nid'] : $a1->nid);
      break;
  }
Kjartan's avatar
Kjartan committed
656
657
}

658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
/**
 * Extract a module-specific search option from a search query. e.g. 'type:book'
 */
function search_query_extract($keys, $option) {
  if (preg_match('/(^| )'. $option .':([^ ]*)( |$)/i', $keys, $matches)) {
    return $matches[2];
  }
}

/**
 * Return a query with the given module-specific search option inserted in.
 * e.g. 'type:book'.
 */
function search_query_insert($keys, $option, $value = '') {
  if (search_query_extract($keys, $option)) {
    $keys = trim(preg_replace('/(^| )'. $option .':[^ ]*/i', '', $keys));
  }
  if ($value != '') {
    $keys .= ' '. $option .':'. $value;
  }
  return $keys;
}

/**
 * Parse a search query into SQL conditions.
 *
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
 * We build two queries that matches the dataset bodies. @See do_search for
 * more about these.
 *
 * @param $text
 *   The search keys.
 * @return
 *   A list of six elements.
 *    * A series of statements AND'd together which will be used to provide all
 *      possible matches.
 *    * Arguments for this query part.
 *    * A series of exact word matches OR'd together.
 *    * Arguments for this query part.
 *    * A bool indicating whether this is a simple query or not. Negative
 *      terms, presence of both AND / OR make this FALSE.
 *    * A bool indicating the presence of a lowercase or. Maybe the user
 *      wanted to use OR.
700
701
702
703
704
705
706
707
708
709
710
711
 */
function search_parse_query($text) {
  $keys = array('positive' => array(), 'negative' => array());

  // Tokenize query string
  preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' '. $text, $matches, PREG_SET_ORDER);

  if (count($matches) < 1) {
    return NULL;
  }

  // Classify tokens
712
  $or = FALSE;
713
  $warning = '';
714
  $simple = TRUE;
715
  foreach ($matches as $match) {
716
    $phrase = FALSE;
Steven Wittens's avatar
Steven Wittens committed
717
    // Strip off phrase quotes
718
719
    if ($match[2]{0} == '"') {
      $match[2] = substr($match[2], 1, -1);
720
      $phrase = TRUE;
721
      $simple = FALSE;
722
    }
Steven Wittens's avatar
Steven Wittens committed
723
    // Simplify keyword according to indexing rules and external preprocessors
724
725
    $words = search_simplify($match[2]);
    // Re-explode in case simplification added more words, except when matching a phrase
Steven Wittens's avatar
Steven Wittens committed
726
    $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
727
728
    // Negative matches
    if ($match[1] == '-') {
729
      $keys['negative'] = array_merge($keys['negative'], $words);
730
731
732
733
    }
    // OR operator: instead of a single keyword, we store an array of all
    // OR'd keywords.
    elseif ($match[2] == 'OR' && count($keys['positive'])) {
Steven Wittens's avatar
Steven Wittens committed
734
735
736
737
738
739
      $last = array_pop($keys['positive']);
      // Starting a new OR?
      if (!is_array($last)) {
        $last = array($last);
      }
      $keys['positive'][] = $last;
740
      $or = TRUE;
741
742
      continue;
    }
743
744
745
746
747
748
    // AND operator: implied, so just ignore it
    elseif ($match[2] == 'AND' || $match[2] == 'and') {
      $warning = $match[2];
      continue;
    }

749
750
    // Plain keyword
    else {
751
      if ($match[2] == 'or') {
752
        $warning = $match[2];
753
      }
754
      if ($or) {
755
756
        // Add to last element (which is an array)
        $keys['positive'][count($keys['positive']) - 1] = array_merge($keys['positive'][count($keys['positive']) - 1], $words);
757
758
      }
      else {
759
        $keys['positive'] = array_merge($keys['positive'], $words);
760
761
      }
    }
762
    $or = FALSE;
763
764
765
766
767
768
769
  }

  // Convert keywords into SQL statements.
  $query = array();
  $query2 = array();
  $arguments = array();
  $arguments2 = array();
Steven Wittens's avatar
Steven Wittens committed
770
  $matches = 0;
771
772
  $simple_and = FALSE;
  $simple_or = FALSE;
773
774
775
776
  // Positive matches
  foreach ($keys['positive'] as $key) {
    // Group of ORed terms
    if (is_array($key) && count($key)) {
777
      $simple_or = TRUE;
778
      $queryor = array();
779
      $any = FALSE;
780
      foreach ($key as $or) {
Steven Wittens's avatar
Steven Wittens committed
781
782
        list($q, $count) = _search_parse_query($or, $arguments2);
        $any |= $count;
783
784
785
786
787
788
789
        if ($q) {
          $queryor[] = $q;
          $arguments[] = $or;
        }
      }
      if (count($queryor)) {
        $query[] = '('. implode(' OR ', $queryor) .')';
Steven Wittens's avatar
Steven Wittens committed
790
791
        // A group of OR keywords only needs to match once
        $matches += ($any > 0);
792
793
794
795
      }
    }
    // Single ANDed term
    else {
796
      $simple_and = TRUE;
Steven Wittens's avatar
Steven Wittens committed
797
      list($q, $count) = _search_parse_query($key, $arguments2);
798
799
800
      if ($q) {
        $query[] = $q;
        $arguments[] = $key;
Steven Wittens's avatar
Steven Wittens committed
801
802
        // Each AND keyword needs to match at least once
        $matches += $count;
803
804
805
      }
    }
  }
806
807
808
  if ($simple_and && $simple_or) {
    $simple = FALSE;
  }
Steven Wittens's avatar
Steven Wittens committed
809
  // Negative matches
810
  foreach ($keys['negative'] as $key) {
811
    list($q) = _search_parse_query($key, $arguments2, TRUE);
812
813
814
    if ($q) {
      $query[] = $q;
      $arguments[] = $key;
815
      $simple = FALSE;
816
817
818
    }
  }
  $query = implode(' AND ', $query);
819

Steven Wittens's avatar
Steven Wittens committed
820
  // Build word-index conditions for the first pass
821
  $query2 = substr(str_repeat("i.word = '%s' OR ", count($arguments2)), 0, -4);
Steven Wittens's avatar
Steven Wittens committed
822

823
  return array($query, $arguments, $query2, $arguments2, $matches, $simple, $warning);
824
825
826
827
828
}

/**
 * Helper function for search_parse_query();
 */
829
function _search_parse_query(&$word, &$scores, $not = FALSE) {
Steven Wittens's avatar
Steven Wittens committed
830
  $count = 0;
831
832
833
834
835
  // Determine the scorewords of this word/phrase
  if (!$not) {
    $split = explode(' ', $word);
    foreach ($split as $s) {
      $num = is_numeric($s);
836
      if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) {
Steven Wittens's avatar
Steven Wittens committed
837
838
839
840
841
        $s = $num ? ((int)ltrim($s, '-0')) : $s;
        if (!isset($scores[$s])) {
          $scores[$s] = $s;
          $count++;
        }
842
843
844
      }
    }
  }
Steven Wittens's avatar
Steven Wittens committed
845
846
  // Return matching snippet and number of added words
  return array("d.data ". ($not ? 'NOT ' : '') ."LIKE '%% %s %%'", $count);
847
848
}

Kjartan's avatar
Kjartan committed
849
/**
Steven Wittens's avatar
Steven Wittens committed
850
 * Do a query on the full-text search index for a word or words.
851
 *
Steven Wittens's avatar
Steven Wittens committed
852
853
 * This function is normally only called by each module that support the
 * indexed search (and thus, implements hook_update_index()).
854
 *
855
856
857
 * Results are retrieved in two logical passes. However, the two passes are
 * joined together into a single query.  And in the case of most simple
 * queries the second pass is not even used.
858
 *
859
860
 * The first pass selects a set of all possible matches, which has the benefit
 * of also providing the exact result set for simple "AND" or "OR" searches.
861
 *
862
863
 * The second portion of the query further refines this set by verifying
 * advanced text conditions (such negative or phrase matches)
864
 *
865
 * @param $keywords
866
867
868
869
 *   A search string as entered by the user.
 *
 * @param $type
 *   A string identifying the calling module.
Kjartan's avatar
Kjartan committed
870
 *
871
872
 * @param $join1
 *   (optional) Inserted into the JOIN part of the first SQL query.
873
874
 *   For example "INNER JOIN {node} n ON n.nid = i.sid".
 *
875
876
877
878
879
880
881
 * @param $where1
 *   (optional) Inserted into the WHERE part of the first SQL query.
 *   For example "(n.status > %d)".
 *
 * @param $arguments1
 *   (optional) Extra SQL arguments belonging to the first query.
 *
882
 * @param $columns2
883
884
885
 *   (optional) Inserted into the SELECT pat of the second query. Must contain
 *   a column selected as 'score'.
 *   defaults to 'i.relevance AS score'
886
 *
887
888
889
890
891
892
 * @param $join2
 *   (optional) Inserted into the JOIN par of the second SQL query.
 *   For example "INNER JOIN {node_comment_statistics} n ON n.nid = i.sid"
 *
 * @param $arguments2
 *   (optional) Extra SQL arguments belonging to the second query parameter.
893
 *
894
 * @param $sort_parameters
Dries's avatar
Dries committed
895
 *   (optional) SQL arguments for sorting the final results.
896
897
 *              Default: 'ORDER BY score DESC'
 *
898
899
 * @return
 *   An array of SIDs for the search results.
900
901
 *
 * @ingroup search
Kjartan's avatar
Kjartan committed
902
 */
903
function do_search($keywords, $type, $join1 = '', $where1 = '1', $arguments1 = array(), $columns2 = 'i.relevance AS score', $join2 = '', $arguments2 = array(), $sort_parameters = 'ORDER BY score DESC') {
904
  $query = search_parse_query($keywords);
905

Steven Wittens's avatar
Steven Wittens committed
906
  if ($query[2] == '') {
907
    form_set_error('keys', t('You must include at least one positive keyword with @count characters or more.', array('@count' => variable_get('minimum_word_size', 3))));
Steven Wittens's avatar
Steven Wittens committed
908
  }
909
  if ($query[6]) {
910
911
912
    if ($query[6] == 'or') {
      drupal_set_message(t('Search for either of the two terms with uppercase <strong>OR</strong>. For example, <strong>cats OR dogs</strong>.'));
    }
913
  }
914
915
  if ($query === NULL || $query[0] == '' || $query[2] == '') {
    return array();
916
  }
917

918
919
920
921
922
923
924
925
926
  // Build query for keyword normalization.
  $conditions = "$where1 AND ($query[2]) AND i.type = '%s'";
  $arguments1 = array_merge($arguments1, $query[3], array($type));
  $join = "INNER JOIN {search_total} t ON i.word = t.word $join1";
  if (!$query[5]) {
    $conditions .= " AND ($query[0])";
    $arguments1 = array_merge($arguments1, $query[1]);
    $join .= " INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type";
  }
927

928
929
930
931
  // Calculate maximum keyword relevance, to normalize it.
  $select = "SELECT MAX(i.score * t.count) FROM {search_index} i $join WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d";
  $arguments = array_merge($arguments1, array($query[4]));
  $normalize = db_result(db_query($select, $arguments));
932
  if (!$normalize) {
933
934
    return array();
  }
935
  $columns2 = str_replace('i.relevance', '('. (1.0 / $normalize) .' * SUM(i.score * t.count))', $columns2);
936

937
938
939
940
  // Build query to retrieve results.
  $select = "SELECT i.type, i.sid, $columns2 FROM {search_index} i $join $join2 WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d";
  $count_select =  "SELECT COUNT(*) FROM ($select) n1";
  $arguments = array_merge($arguments2, $arguments1, array($query[4]));
941

942
  // Do actual search query
943
  $result = pager_query("$select $sort_parameters", 10, 0, $count_select, $arguments);
944
945
  $results = array();
  while ($item = db_fetch_object($result)) {
946
    $results[] = $item;
947
948
  }
  return $results;
Kjartan's avatar
Kjartan committed
949
950
}

951
952
953
954
/**
 * Helper function for grabbing search keys.
 */
function search_get_keys() {
955
956
957
958
959
960
961
962
963
  static $return;
  if (!isset($return)) {
    // Extract keys as remainder of path
    // Note: support old GET format of searches for existing links.
    $path = explode('/', $_GET['q'], 3);
    $keys = empty($_REQUEST['keys']) ? '' : $_REQUEST['keys'];
    $return = count($path) == 3 ? $path[2] : $keys;
  }
  return $return;
964
965
}

966
967
968
969
970
971
972
973
/**
 * @defgroup search Search interface
 * @{
 * The Drupal search interface manages a global search mechanism.
 *
 * Modules may plug into this system to provide searches of different types of
 * data. Most of the system is handled by search.module, so this must be enabled
 * for all of the search features to work.
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
 *
 * There are three ways to interact with the search system:
 * - Specifically for searching nodes, you can implement nodeapi('update index')
 *   and nodeapi('search result'). However, note that the search system already
 *   indexes all visible output of a node, i.e. everything displayed normally
 *   by hook_view() and hook_nodeapi('view'). This is usually sufficient.
 *   You should only use this mechanism if you want additional, non-visible data
 *   to be indexed.
 * - Implement hook_search(). This will create a search tab for your module on
 *   the /search page with a simple keyword search form. You may optionally
 *   implement hook_search_item() to customize the display of your results.
 * - Implement hook_update_index(). This allows your module to use Drupal's
 *   HTML indexing mechanism for searching full text efficiently.
 *
 * If your module needs to provide a more complicated search form, then you need
Dries's avatar
Dries committed
989
 * to implement it yourself without hook_search(). In that case, you should
990
991
 * define it as a local task (tab) under the /search page (e.g. /search/mymodule)
 * so that users can easily find it.
992
993
994
995
996
997
998
999
1000
 */

/**
 * Render a search form.
 *
 * @param $action
 *   Form action. Defaults to "search".
 * @param $keys
 *   The search string entered by the user, containing keywords for the search.
1001
1002
1003
 * @param $type
 *   The type of search to render the node for. Must be the name of module
 *   which implements hook_search(). Defaults to 'node'.
1004
1005
 * @param $prompt
 *   A piece of text to put before the form (e.g. "Enter your keywords")
1006
1007
1008
 * @return
 *   An HTML string containing the search form.
 */
1009
function search_form(&$form_state, $action = '', $keys = '', $type = NULL, $prompt = NULL) {
1010
1011
1012
1013

  // Add CSS
  drupal_add_css(drupal_get_path('module', 'search') .'/search.css', 'module', 'all', FALSE);

1014
  if (!$action) {
1015
    $action = url('search/'. $type);
1016
  }
1017
1018
1019
  if (is_null($prompt)) {
    $prompt = t('Enter your keywords');
  }
1020

1021
1022
1023
1024
1025
  $form = array(
    '#action' => $action,
    '#attributes' => array('class' => 'search-form'),
  );
  $form['module'] = array('#type' => 'value', '#value' => $type);
1026
  $form['basic'] = array('#type' => 'item', '#title' => $prompt);
1027
1028
1029
1030
1031
1032
1033
1034
  $form['basic']['inline'] = array('#prefix' => '<div class="container-inline">', '#suffix' => '</div>');
  $form['basic']['inline']['keys'] = array(
    '#type' => 'textfield',
    '#title' => '',
    '#default_value' => $keys,
    '#size' => $prompt ? 40 : 20,
    '#maxlength' => 255,
  );
1035
1036
  // processed_keys is used to coordinate keyword passing between other forms
  // that hook into the basic search form.
1037
  $form['basic']['inline']['processed_keys'] = array('#type' => 'value', '#value' => array());
1038
  $form['basic']['inline']['submit'] = array('#type' => 'submit', '#value' => t('Search'));
1039

1040
  return $form;
1041
1042
1043
}

/**
1044
1045
1046
1047
1048
 * Form builder; Output a search form for the search block and the theme's search box.
 *
 * @ingroup forms
 * @see search_box_form_submit().
 * @see theme_search_box_form().
1049
 */
1050
function search_box(&$form_state, $form_id) {
1051
  $form[$form_id] = array(
1052
    '#title' => t('Search this site'),