- #41897: Dead variable (remove_short)
- #39117: Fix chinese search problem
- Fix bug with and OR queries
- Add smarter highlighting for CJK strings
- Add message about minimum word length to user
- Improve code comments
2 merge requests!7452Issue #1797438. HTML5 validation is preventing form submit and not fully...,!789Issue #3210310: Adjust Database API to remove deprecated Drupal 9 code in Drupal 10
$form['indexing_settings']['info']=array('#type'=>'markup','#value'=>'<em>'.t('<p>Changing the settings below will cause the site index to be rebuilt. The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed.</p><p>The default settings should be appropriate for the majority of sites.</p>').'</em>');
$form['indexing_settings']['info']=array('#type'=>'markup','#value'=>'<em>'.t('<p>Changing the settings below will cause the site index to be rebuilt. The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed.</p><p>The default settings should be appropriate for the majority of sites.</p>').'</em>');
$form['indexing_settings']['minimum_word_size']=array('#type'=>'textfield','#title'=>t('Minimum word length to index'),'#default_value'=>variable_get('minimum_word_size',3),'#size'=>5,'#maxlength'=>3,'#description'=>t('The number of characters a word has to be to be indexed. A lower setting means better search result ranking, but also a larger database. Each search query must contain at least one keyword that is this size (or longer).'));
$form['indexing_settings']['minimum_word_size']=array('#type'=>'textfield','#title'=>t('Minimum word length to index'),'#default_value'=>variable_get('minimum_word_size',3),'#size'=>5,'#maxlength'=>3,'#description'=>t('The number of characters a word has to be to be indexed. A lower setting means better search result ranking, but also a larger database. Each search query must contain at least one keyword that is this size (or longer).'));
$form['indexing_settings']['remove_short']=array('#type'=>'textfield','#title'=>t('Minimum word length to search for'),'#default_value'=>variable_get('remove_short',3),'#size'=>5,'#maxlength'=>3,'#description'=>t('The number of characters a word has to be to be searched for, including wildcard characters.'));
$form['indexing_settings']['overlap_cjk']=array('#type'=>'checkbox','#title'=>t('Simple CJK handling'),'#default_value'=>variable_get('overlap_cjk',true),'#description'=>t('Whether to apply a simple Chinese/Japanese/Korean tokenizer based on overlapping sequences. Turn this off if you want to use an external preprocessor for this instead. Does not affect other languages.'));
$form['indexing_settings']['overlap_cjk']=array('#type'=>'checkbox','#title'=>t('Simple CJK handling'),'#default_value'=>variable_get('overlap_cjk',true),'#description'=>t('Whether to apply a simple Chinese/Japanese/Korean tokenizer based on overlapping sequences. Turn this off if you want to use an external preprocessor for this instead. Does not affect other languages.'));
// Per module settings
// Per module settings
...
@@ -364,7 +363,7 @@ function search_expand_cjk($matches) {
...
@@ -364,7 +363,7 @@ function search_expand_cjk($matches) {
$l=drupal_strlen($str);
$l=drupal_strlen($str);
// Passthrough short words
// Passthrough short words
if($l<=$min){
if($l<=$min){
return$str;
return' '.$str.' ';
}
}
$tokens=' ';
$tokens=' ';
// FIFO queue of characters
// FIFO queue of characters
...
@@ -640,15 +639,15 @@ function search_parse_query($text) {
...
@@ -640,15 +639,15 @@ function search_parse_query($text) {
$or=false;
$or=false;
foreach($matchesas$match){
foreach($matchesas$match){
$phrase=false;
$phrase=false;
// Strip off quotes
// Strip off phrase quotes
if($match[2]{0}=='"'){
if($match[2]{0}=='"'){
$match[2]=substr($match[2],1,-1);
$match[2]=substr($match[2],1,-1);
$phrase=true;
$phrase=true;
}
}
// Simplify keyword according to indexing rules
// Simplify keyword according to indexing rules and external preprocessors
$words=search_simplify($match[2]);
$words=search_simplify($match[2]);
// Re-explode in case simplification added more words, except when matching a phrase
// Re-explode in case simplification added more words, except when matching a phrase
functiondo_search($keywords,$type,$join1='',$where1='1',$arguments1=array(),$select2='i.relevance AS score',$join2='',$arguments2=array()){
functiondo_search($keywords,$type,$join1='',$where1='1',$arguments1=array(),$select2='i.relevance AS score',$join2='',$arguments2=array()){
$query=search_parse_query($keywords);
$query=search_parse_query($keywords);
if($query[2]==''){
form_set_error('keys',t('You must include at least one positive keyword with %count characters or more.',array('%count'=>variable_get('minimum_word_size',3))));
}
if($query===NULL||$query[0]==''||$query[2]==''){
if($query===NULL||$query[0]==''||$query[2]==''){
returnarray();
returnarray();
}
}
...
@@ -808,7 +829,7 @@ function do_search($keywords, $type, $join1 = '', $where1 = '1', $arguments1 = a
...
@@ -808,7 +829,7 @@ function do_search($keywords, $type, $join1 = '', $where1 = '1', $arguments1 = a
// First pass: select all possible matching sids, doing a simple index-based OR matching on the keywords.
// First pass: select all possible matching sids, doing a simple index-based OR matching on the keywords.
// 'matches' is used to reject those items that cannot possibly match the query.
// 'matches' is used to reject those items that cannot possibly match the query.
$conditions=$where1.' AND ('.$query[2].") AND i.type = '%s'";
$conditions=$where1.' AND ('.$query[2].") AND i.type = '%s'";
$result=db_query_temporary("SELECT i.type, i.sid, SUM(i.score * t.count) AS relevance, COUNT(*) AS matches FROM {search_index} i INNER JOIN {search_total} t ON i.word = t.word $join1 WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d",$arguments,'temp_search_sids');
$result=db_query_temporary("SELECT i.type, i.sid, SUM(i.score * t.count) AS relevance, COUNT(*) AS matches FROM {search_index} i INNER JOIN {search_total} t ON i.word = t.word $join1 WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d",$arguments,'temp_search_sids');
$form['indexing_settings']['info']=array('#type'=>'markup','#value'=>'<em>'.t('<p>Changing the settings below will cause the site index to be rebuilt. The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed.</p><p>The default settings should be appropriate for the majority of sites.</p>').'</em>');
$form['indexing_settings']['info']=array('#type'=>'markup','#value'=>'<em>'.t('<p>Changing the settings below will cause the site index to be rebuilt. The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed.</p><p>The default settings should be appropriate for the majority of sites.</p>').'</em>');
$form['indexing_settings']['minimum_word_size']=array('#type'=>'textfield','#title'=>t('Minimum word length to index'),'#default_value'=>variable_get('minimum_word_size',3),'#size'=>5,'#maxlength'=>3,'#description'=>t('The number of characters a word has to be to be indexed. A lower setting means better search result ranking, but also a larger database. Each search query must contain at least one keyword that is this size (or longer).'));
$form['indexing_settings']['minimum_word_size']=array('#type'=>'textfield','#title'=>t('Minimum word length to index'),'#default_value'=>variable_get('minimum_word_size',3),'#size'=>5,'#maxlength'=>3,'#description'=>t('The number of characters a word has to be to be indexed. A lower setting means better search result ranking, but also a larger database. Each search query must contain at least one keyword that is this size (or longer).'));
$form['indexing_settings']['remove_short']=array('#type'=>'textfield','#title'=>t('Minimum word length to search for'),'#default_value'=>variable_get('remove_short',3),'#size'=>5,'#maxlength'=>3,'#description'=>t('The number of characters a word has to be to be searched for, including wildcard characters.'));
$form['indexing_settings']['overlap_cjk']=array('#type'=>'checkbox','#title'=>t('Simple CJK handling'),'#default_value'=>variable_get('overlap_cjk',true),'#description'=>t('Whether to apply a simple Chinese/Japanese/Korean tokenizer based on overlapping sequences. Turn this off if you want to use an external preprocessor for this instead. Does not affect other languages.'));
$form['indexing_settings']['overlap_cjk']=array('#type'=>'checkbox','#title'=>t('Simple CJK handling'),'#default_value'=>variable_get('overlap_cjk',true),'#description'=>t('Whether to apply a simple Chinese/Japanese/Korean tokenizer based on overlapping sequences. Turn this off if you want to use an external preprocessor for this instead. Does not affect other languages.'));
// Per module settings
// Per module settings
...
@@ -364,7 +363,7 @@ function search_expand_cjk($matches) {
...
@@ -364,7 +363,7 @@ function search_expand_cjk($matches) {
$l=drupal_strlen($str);
$l=drupal_strlen($str);
// Passthrough short words
// Passthrough short words
if($l<=$min){
if($l<=$min){
return$str;
return' '.$str.' ';
}
}
$tokens=' ';
$tokens=' ';
// FIFO queue of characters
// FIFO queue of characters
...
@@ -640,15 +639,15 @@ function search_parse_query($text) {
...
@@ -640,15 +639,15 @@ function search_parse_query($text) {
$or=false;
$or=false;
foreach($matchesas$match){
foreach($matchesas$match){
$phrase=false;
$phrase=false;
// Strip off quotes
// Strip off phrase quotes
if($match[2]{0}=='"'){
if($match[2]{0}=='"'){
$match[2]=substr($match[2],1,-1);
$match[2]=substr($match[2],1,-1);
$phrase=true;
$phrase=true;
}
}
// Simplify keyword according to indexing rules
// Simplify keyword according to indexing rules and external preprocessors
$words=search_simplify($match[2]);
$words=search_simplify($match[2]);
// Re-explode in case simplification added more words, except when matching a phrase
// Re-explode in case simplification added more words, except when matching a phrase
functiondo_search($keywords,$type,$join1='',$where1='1',$arguments1=array(),$select2='i.relevance AS score',$join2='',$arguments2=array()){
functiondo_search($keywords,$type,$join1='',$where1='1',$arguments1=array(),$select2='i.relevance AS score',$join2='',$arguments2=array()){
$query=search_parse_query($keywords);
$query=search_parse_query($keywords);
if($query[2]==''){
form_set_error('keys',t('You must include at least one positive keyword with %count characters or more.',array('%count'=>variable_get('minimum_word_size',3))));
}
if($query===NULL||$query[0]==''||$query[2]==''){
if($query===NULL||$query[0]==''||$query[2]==''){
returnarray();
returnarray();
}
}
...
@@ -808,7 +829,7 @@ function do_search($keywords, $type, $join1 = '', $where1 = '1', $arguments1 = a
...
@@ -808,7 +829,7 @@ function do_search($keywords, $type, $join1 = '', $where1 = '1', $arguments1 = a
// First pass: select all possible matching sids, doing a simple index-based OR matching on the keywords.
// First pass: select all possible matching sids, doing a simple index-based OR matching on the keywords.
// 'matches' is used to reject those items that cannot possibly match the query.
// 'matches' is used to reject those items that cannot possibly match the query.
$conditions=$where1.' AND ('.$query[2].") AND i.type = '%s'";
$conditions=$where1.' AND ('.$query[2].") AND i.type = '%s'";
$result=db_query_temporary("SELECT i.type, i.sid, SUM(i.score * t.count) AS relevance, COUNT(*) AS matches FROM {search_index} i INNER JOIN {search_total} t ON i.word = t.word $join1 WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d",$arguments,'temp_search_sids');
$result=db_query_temporary("SELECT i.type, i.sid, SUM(i.score * t.count) AS relevance, COUNT(*) AS matches FROM {search_index} i INNER JOIN {search_total} t ON i.word = t.word $join1 WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d",$arguments,'temp_search_sids');