diff options
author | brettp <brettp@36083f99-b078-4883-b0ff-0f9b5a30f544> | 2009-11-07 20:57:32 +0000 |
---|---|---|
committer | brettp <brettp@36083f99-b078-4883-b0ff-0f9b5a30f544> | 2009-11-07 20:57:32 +0000 |
commit | 24e3ff747614364d0d44fc1a7644f164146c66e1 (patch) | |
tree | cab9495aa3a6daf5c1aa3bac6de61e3436e77380 /mod | |
parent | 413ea817c78a2cd7cbe3bebad1623daaaeea6394 (diff) | |
download | elgg-24e3ff747614364d0d44fc1a7644f164146c66e1.tar.gz elgg-24e3ff747614364d0d44fc1a7644f164146c66e1.tar.bz2 |
Moved default search hooks into search mod.
Using MySQL's MATCH ... AGAINST instead of likes for most searches.
Changed 'tag' to 'q' while maintaining backward compatibility.
git-svn-id: http://code.elgg.org/elgg/trunk@3633 36083f99-b078-4883-b0ff-0f9b5a30f544
Diffstat (limited to 'mod')
-rw-r--r-- | mod/search/index.php | 6 | ||||
-rw-r--r-- | mod/search/search_hooks.php | 207 | ||||
-rw-r--r-- | mod/search/start.php | 300 | ||||
-rw-r--r-- | mod/search/views/default/page_elements/searchbox.php | 2 | ||||
-rw-r--r-- | mod/search/views/default/search/listing.php | 15 |
5 files changed, 434 insertions, 96 deletions
diff --git a/mod/search/index.php b/mod/search/index.php index 30f33f9f1..f9a96a60b 100644 --- a/mod/search/index.php +++ b/mod/search/index.php @@ -2,7 +2,9 @@ // $search_type == all || entities || trigger plugin hook $search_type = get_input('search_type', 'all'); -$query = sanitise_string(get_input('query', get_input('tag'))); + +// @todo there is a bug in get_input that makes variables have slashes sometimes. +$query = sanitise_string(stripslashes(get_input('q', get_input('tag', '', FALSE), FALSE))); // get limit and offset. override if on search dashboard, where only 2 // of each most recent entity types will be shown. @@ -117,7 +119,7 @@ call search */ -$layout = elgg_view_layout('single_column', '', $results_html); +$layout = elgg_view_layout('two_column_left_sidebar', '', $results_html); page_draw($title, $layout); diff --git a/mod/search/search_hooks.php b/mod/search/search_hooks.php new file mode 100644 index 000000000..f673b7512 --- /dev/null +++ b/mod/search/search_hooks.php @@ -0,0 +1,207 @@ +<?php +/** + * Elgg core search. + * + * @package Elgg + * @subpackage Core + * @author Curverider Ltd <info@elgg.com>, The MITRE Corporation <http://www.mitre.org> + * @link http://elgg.org/ + */ + +/** + * Return default results for searches on objects. + * + * @param unknown_type $hook + * @param unknown_type $type + * @param unknown_type $value + * @param unknown_type $params + * @return unknown_type + */ +function search_objects_hook($hook, $type, $value, $params) { + global $CONFIG; + + $join = "JOIN {$CONFIG->dbprefix}objects_entity oe ON e.guid = oe.guid"; + $params['joins'] = array($join); + $fields = array('title', 'description'); + + $where = search_get_where_sql('oe', $fields, $params); + + $params['wheres'] = array($where); + + //@todo allow sorting by recent time + $params['order_by'] = NULL; + + $entities = elgg_get_entities($params); + $params['count'] = TRUE; + $count = elgg_get_entities($params); + + // no need to continue if nothing here. + if (!$count) { + return array('entities' => array(), 'count' => $count); + } + + // add the volatile data for why these entities have been returned. + foreach ($entities as $entity) { + //$title = search_get_highlighted_relevant_substrings($entity->title, $params['query']); + $title = search_get_relevant_substring($entity->title, $params['query'], '<strong class="searchMatch">', '</strong>'); + $entity->setVolatileData('search_matched_title', $title); + + $desc = search_get_relevant_substring($entity->description, $params['query'], '<strong class="searchMatch">', '</strong>'); + $entity->setVolatileData('search_matched_description', $desc); + } + + return array( + 'entities' => $entities, + 'count' => $count, + ); +} + +/** + * Return default results for searches on groups. + * + * @param unknown_type $hook + * @param unknown_type $type + * @param unknown_type $value + * @param unknown_type $params + * @return unknown_type + */ +function search_groups_hook($hook, $type, $value, $params) { + global $CONFIG; + + $query = $params['query']; + + $join = "JOIN {$CONFIG->dbprefix}groups_entity ge ON e.guid = ge.guid"; + $params['joins'] = array($join); + $fields = array('name', 'description'); + + $where = search_get_where_sql('ge', $fields, $params); + + $params['wheres'] = array($where); + + //@todo allow sorting by recent time + $params['order_by'] = NULL; + + $entities = elgg_get_entities($params); + $params['count'] = TRUE; + $count = elgg_get_entities($params); + + // no need to continue if nothing here. + if (!$count) { + return array('entities' => array(), 'count' => $count); + } + + // add the volatile data for why these entities have been returned. + foreach ($entities as $entity) { + $description = search_get_relevant_substring($entity->description, $query, '<strong class="searchMatch">', '</strong>'); + $entity->setVolatileData('search_matched_title', $description); + + $name = search_get_relevant_substring($entity->name, $query, '<strong class="searchMatch">', '</strong>'); + $entity->setVolatileData('search_matched_description', $name); + } + + return array( + 'entities' => $entities, + 'count' => $count, + ); +} + +/** + * Return default results for searches on users. + * + * @param unknown_type $hook + * @param unknown_type $type + * @param unknown_type $value + * @param unknown_type $params + * @return unknown_type + */ +function search_users_hook($hook, $type, $value, $params) { + global $CONFIG; + + $query = $params['query']; + + $join = "JOIN {$CONFIG->dbprefix}users_entity ue ON e.guid = ue.guid"; + $params['joins'] = array($join); + + // use like here because of the simplicity of the search + $where = "(ue.guid = e.guid + AND (ue.username LIKE '%$query%' + OR ue.name LIKE '%$query%' + ) + )"; + $params['wheres'] = array($where); + + $entities = elgg_get_entities($params); + $params['count'] = TRUE; + $count = elgg_get_entities($params); + + // no need to continue if nothing here. + if (!$count) { + return array('entities' => array(), 'count' => $count); + } + + // add the volatile data for why these entities have been returned. + foreach ($entities as $entity) { + $username = search_get_relevant_substring($entity->username, $query, '<strong class="searchMatch">', '</strong>'); + $entity->setVolatileData('search_matched_title', $username); + + $name = search_get_relevant_substring($entity->name, $query, '<strong class="searchMatch">', '</strong>'); + $entity->setVolatileData('search_matched_description', $name); + } + + return array( + 'entities' => $entities, + 'count' => $count, + ); +} + +/** + * Return default results for searches on tags. + * + * @param unknown_type $hook + * @param unknown_type $type + * @param unknown_type $value + * @param unknown_type $params + * @return unknown_type + */ +function search_tags_hook($hook, $type, $value, $params) { + global $CONFIG; + + $query = $params['query']; + $params['metadata_name_value_pair'] = array ('name' => 'tags', 'value' => $query, 'case_sensitive' => FALSE); + + $entities = elgg_get_entities_from_metadata($params); + $params['count'] = TRUE; + $count = elgg_get_entities_from_metadata($params); + + // no need to continue if nothing here. + if (!$count) { + return array('entities' => array(), 'count' => $count); + } + + // add the volatile data for why these entities have been returned. + foreach ($entities as $entity) { + $tags = implode(',', $entity->tags); + $tags_str = search_get_relevant_substring($tags, $query, '<strong class="searchMatch">', '</strong>'); + $entity->setVolatileData('search_matched_tags', $tags_str); + } + + return array( + 'entities' => $entities, + 'count' => $count, + ); +} + +/** + * Register tags as a custom search type. + * + * @param unknown_type $hook + * @param unknown_type $type + * @param unknown_type $value + * @param unknown_type $params + * @return unknown_type + */ +function search_custom_types_tags_hook($hook, $type, $value, $params) { + $value[] = 'tags'; + return $value; +} + diff --git a/mod/search/start.php b/mod/search/start.php index 4bd342285..47405450a 100644 --- a/mod/search/start.php +++ b/mod/search/start.php @@ -1,5 +1,4 @@ <?php - /** * Elgg core search. * @@ -14,9 +13,31 @@ * */ function search_init() { + global $CONFIG; + require_once 'search_hooks.php'; + // page handler for search actions and results register_page_handler('search','search_page_handler'); + // register some default search hooks + register_plugin_hook('search', 'object', 'search_objects_hook'); + register_plugin_hook('search', 'user', 'search_users_hook'); + + // @todo pull this out into groups + register_plugin_hook('search', 'group', 'search_groups_hook'); + + // tags are a bit different. + // register a custom search type and a hook for that. + register_plugin_hook('search_types', 'get_types', 'search_custom_types_tags_hook'); + register_plugin_hook('search', 'tags', 'search_tags_hook'); + + // get server min and max allowed chars for ft searching + $word_lens = get_data('SELECT @@ft_min_word_len as min, @@ft_max_word_len as max'); + + $CONFIG->search_info = array(); + $CONFIG->search_info['min_chars'] = $word_lens[0]->min; + $CONFIG->search_info['max_chars'] = $word_lens[0]->max; + // add in CSS for search elements extend_view('css', 'search/css'); } @@ -29,114 +50,151 @@ function search_init() { function search_page_handler($page) { global $CONFIG; - if(!get_input('tag')) { - set_input('tag', $page[0]); + // if there is no q set, we're being called from a legacy installation + // it expects a search by tags. + // actually it doesn't, but maybe it should. + // maintain backward compatibility + if(!get_input('q', get_input('tag', NULL))) { + set_input('q', $page[0]); + //set_input('search_type', 'tags'); } - include_once($CONFIG->path . "mod/search/index.php"); + include_once('index.php'); } /** - * Core search hook. - * Returns an object with two parts: - * ->entities: an array of instantiated entities that have been decorated with - * volatile "search" data indicating what they matched. These are - * the entities to be displayed to the user on this page. - * ->total: total number of entities overall. This function can update this - * limit to ask for more pages in the pagination. + * Return a string with highlighted matched elements. + * Checks for "s + * Provides context for matched elements. + * Will not return more than $max_length of full context. + * Only highlights words + * + * @param unknown_type $haystack + * @param unknown_type $need + * @param unknown_type $context + * @param unknown_type $max_length + * @return unknown_type */ -function search_original_hook($hook, $type, $returnvalue, $params) { - global $CONFIG; +function search_get_highlighted_relevant_substrings($haystack, $needle, $min_match_context = 15, $max_length = 250) { + $haystack = strip_tags($haystack); + $haystack_lc = strtolower($haystack); + + // for now don't worry about "s or boolean operators + $needle = str_replace(array('"', '-', '+', '~'), '', stripslashes(strip_tags($needle))); + $words = explode(' ', $needle); + + $min_chars = $CONFIG->search_info['min_chars']; + // if > ft_min_word == not running in literal mode. + if ($needle >= $min_chars) { + // clean out any words that are ignored by mysql + foreach ($words as $i => $word) { + if (strlen($word) < $min_chars) { + unset ($words[$i]); + } + } + } - var_dump($CONFIG->hooks); - - $tag = $params['tag']; - $offset = $params['offset']; // starting page - $limit = $params['limit']; // number per page - $searchtype = $params['searchtype']; // the search type we're looking for - $object_type = $params['object_type']; - $subtype = $params['subtype']; - $owner_guid = $params['owner_guid']; - $tagtype = $params['tagtype']; - - $count = get_entities_from_metadata($tagtype, elgg_strtolower($tag), $object_type, $subtype, $owner_guid, $limit, $offset, "", 0, TRUE, FALSE); - $ents = get_entities_from_metadata($tagtype, elgg_strtolower($tag), $object_type, $subtype, $owner_guid, $limit, $offset, "", 0, FALSE, FALSE); - -// $options = array( -// 'metadata_name_value_pair' => array('name' => $params['tagtype'], 'value' => $params['tag'], 'case_sensitive' => false), -// 'offset' => $params['offset'], -// 'limit' => $params['limit'], -// 'type' => $params['object_type'], -// 'subtype' => $params['subtype'], -// 'owner_guid' => $params['owner_guid'] -// ); -// -// $count = elgg_get_entities_from_metadata(array_merge($options, array('count' => TRUE))); -// $entities = elgg_get_entities_from_metadata($options); - - /* - * Foreach entity - * get the metadata keys - * If the value matches, hang onto the key - * add all the matched keys to VolatileData - * This tells us *why* each entity matched - */ - foreach ($ents as $ent) { - $metadata = get_metadata_for_entity($ent->getGUID()); - $matched = array(); - if ($metadata) { - foreach ($metadata as $tuple) { - if ($tag === $tuple->value) { - // This is one of the matching elements - $matched[] = $tuple->name; - } + $substr_counts = array(); + $str_pos = array(); + // get the full count of matches. + foreach ($words as $word) { + $word = strtolower($word); + $count = substr_count($haystack, $word); + $word_len = strlen($word); + + // find the start positions for the words + // get the context for words based upon + if ($count > 1) { + $str_pos[$word] = array(); + $offset = 0; + while (FALSE !== $pos = strpos($haystack, $word, $offset)) { + $str_pos[$word][] = $pos; + $offset += $pos + $word_len; } - $ent->setVolatileData('search', $matched); + } else { + $str_pos[$word] = array(strpos($haystack, $word)); } + $substr_counts[$word] = $count; } - // merge in our entities with any coming in from elsewhere - $returnvalue->entities = array_merge($returnvalue->entities, $ents); +//A test with multiple words and now more in the subject too because words need to be everywhere - // expand the total entity count if necessary - if ($count > $returnvalue->total) { - $returnvalue->total = $count; - } + // sort by order of occurence + krsort($substr_counts); + $full_count = array_sum($substr_counts); - return $returnvalue; -} -/** - * Provides default search for registered entity subtypes. - * Entity types should be dealt with in the entity classes. (Objects are an exception). - * - * @param unknown_type $hook - * @param unknown_type $type - * @param unknown_type $returnvalue - * @param unknown_type $params - * @return unknown_type - */ -function search_registered_entities($hook, $type, $returnvalue, $params) { - $entity_types = get_registered_entity_types(); - foreach ($entity_types as $type => $subtypes) { - if (is_array($subtypes) && count($subtypes)) { - } + + // get full number of matches against all words to see how many we actually want to look at. + + + + +// $desc = search_get_relevant_substring($entity->description, $params['query'], '<strong class="searchMatch">', '</strong>'); + + + $params['query']; + // "this is"just a test "silly person" + + // check for "s + $words_quotes = explode('"', $needle); + + $words_orig = explode(' ', $needle); + $words = array(); + + foreach ($words_orig as $i => $word) { + // figure out if we have a special operand + $operand = substr($word, 0, 1); + switch($operand) { + case '"': + // find the matching " if any. else, remove the " + if (substr_count($query, '"') < 2) { + $words[] = substr($word, 1); + } else { + $word = substr($word, 1); + $word_i = $i; + while ('"' != strpos($words_orig[$word_i], '"')) { + $word .= " {$words_orig[$word_i]}"; + unset($words_orig[$word_i]); + } + + + } + + break; + + case '+': + // remove + + $words[] = substr($word, 1); + break; + + case '~': + case '-': + // remove this from highlighted list. + + break; + } } -} -/** - * return our base search types (right now, we have none) - */ -function search_base_search_types_hook($hook, $type, $returnvalue, $params) { - if (!is_array($returnvalue)) { - $returnvalue = array(); + // pick out " queries + if (substr_count($query, '"') >= 2) { + } - return $returnvalue; -} + // ignore queries starting with - + + // @todo figure out a way to "center" the matches within the max_length. + // if only one match, its context is $context + $max_length / 2 + // if 2 matches, its context is $context + $max_length / 4 + // if 3 matches, its context is $context + $max_length / 6 + // $context per match = $min_match_context + ($max_length / $num_count_match) + + // if $max_length / ($matched_count * 2) < $context + // only match against the first X matches where $context >= $context +} /** * Returns a matching string with $context amount of context, optionally @@ -148,7 +206,7 @@ function search_base_search_types_hook($hook, $type, $returnvalue, $params) { * @param str $needle * @param str $before * @param str $after - * @param str $context + * @param int $context * @return str */ function search_get_relevant_substring($haystack, $needle, $before = '', $after = '', $context = 75) { @@ -181,7 +239,7 @@ function search_get_relevant_substring($haystack, $needle, $before = '', $after } // add elipses to end. - if ($start_pos + $context < strlen($haystack)) { + if ($pos + strlen($needle) + $context*2 < strlen($haystack)) { $matched = "$matched..."; } @@ -194,7 +252,15 @@ function search_get_relevant_substring($haystack, $needle, $before = '', $after } - +/** + * Passes entities, count, and original params to the view functions for + * search type. + * + * @param array $entities + * @param int $count + * @param array $params + * @return string + */ function search_get_listing_html($entities, $count, $params) { if (!is_array($entities) || !$count) { return FALSE; @@ -235,6 +301,58 @@ function search_get_listing_html($entities, $count, $params) { return FALSE; } +/** + * Returns a where clause for a search query. + * + * @param str $table Prefix for table to search on + * @param array $fields Fields to match against + * @param array $params Original search params + * @return str + */ +function search_get_where_sql($table, $fields, $params) { + global $CONFIG; + $query = $params['query']; + + // add the table prefix to the fields + foreach ($fields as $i => $field) { + $fields[$i] = "$table.$field"; + } + + // if query is shorter than the min for fts words + // it's likely a single acronym or similar + // switch to literal mode + if (strlen($query) < $CONFIG->search_info['min_chars']) { + $likes = array(); + foreach ($fields as $field) { + $likes[] = "$field LIKE '%$query%'"; + } + $likes_str = implode(' OR ', $likes); + $where = "($table.guid = e.guid AND ($likes_str))"; + } else { + // if using advanced or paired "s, switch into boolean mode + if ((isset($params['advanced_search']) && $params['advanced_search']) || substr_count($query, '"') >= 2 ) { + $options = 'IN BOOLEAN MODE'; + } else { + $options = 'IN NATURAL LANGUAGE MODE'; + } + + // if short query, use query expansion. + if (strlen($query) < 6) { + $options .= ' WITH QUERY EXPANSION'; + } + // if query is shorter than the ft_min_word_len switch to literal mode. + $fields_str = implode(',', $fields); + $where = "($table.guid = e.guid AND (MATCH ($fields_str) AGAINST ('$query' $options)))"; + } + + return $where; +} + +function search_get_query_where_sql($table, $query) { + // if there are multiple "s or 's it's a literal string. + +} + /** Register init system event **/ register_elgg_event_handler('init','system','search_init');
\ No newline at end of file diff --git a/mod/search/views/default/page_elements/searchbox.php b/mod/search/views/default/page_elements/searchbox.php index 4bab36be9..cfc0b953b 100644 --- a/mod/search/views/default/page_elements/searchbox.php +++ b/mod/search/views/default/page_elements/searchbox.php @@ -1,4 +1,4 @@ <form id="searchform" action="<?php echo $vars['url']; ?>pg/search/" method="get"> - <input type="text" size="21" name="tag" value="<?php echo elgg_echo('search'); ?>" onclick="if (this.value=='<?php echo elgg_echo('search'); ?>') { this.value='' }" class="search_input" /> + <input type="text" size="21" name="q" value="<?php echo elgg_echo('search'); ?>" onclick="if (this.value=='<?php echo elgg_echo('search'); ?>') { this.value='' }" class="search_input" /> <input type="submit" value="<?php echo elgg_echo('search:go'); ?>" class="search_submit_button" /> </form> diff --git a/mod/search/views/default/search/listing.php b/mod/search/views/default/search/listing.php index 2ed657547..270e33267 100644 --- a/mod/search/views/default/search/listing.php +++ b/mod/search/views/default/search/listing.php @@ -13,7 +13,7 @@ <?php $entities = $vars['entities']; -$count = $vars['count']; +$count = $vars['count'] - count($vars['entities']); if (!is_array($vars['entities']) || !count($vars['entities'])) { return FALSE; @@ -22,6 +22,17 @@ if (!is_array($vars['entities']) || !count($vars['entities'])) { $title_str = elgg_echo("item:{$vars['params']['type']}:{$vars['params']['subtype']}"); $body = elgg_view_title($title_str); +$query = htmlspecialchars(http_build_query( + array( + 'q' => $vars['params']['query'], + 'type' => $vars['params']['type'], + 'subtype' => $vars['params']['subtype'] + ) +)); + +$url = "{$vars['url']}pg/search?$query"; +$more = "<a href=\"$url\">+$count more $title_str</a>"; + echo elgg_view('page_elements/contentwrapper', array('body' => $body)); foreach ($entities as $entity) { @@ -44,7 +55,7 @@ foreach ($entities as $entity) { <h3 class="searchTitle">$title</h3> <span class="searchDetails"> <span class="searchDescription">$description</span><br /> - $icon - $time - <a href="">More $title_str</a> - + $icon $time - $more</a> </span> </span> ___END; |