aboutsummaryrefslogtreecommitdiff
path: root/mod/search
diff options
context:
space:
mode:
authorbrettp <brettp@36083f99-b078-4883-b0ff-0f9b5a30f544>2009-11-07 20:57:32 +0000
committerbrettp <brettp@36083f99-b078-4883-b0ff-0f9b5a30f544>2009-11-07 20:57:32 +0000
commit24e3ff747614364d0d44fc1a7644f164146c66e1 (patch)
treecab9495aa3a6daf5c1aa3bac6de61e3436e77380 /mod/search
parent413ea817c78a2cd7cbe3bebad1623daaaeea6394 (diff)
downloadelgg-24e3ff747614364d0d44fc1a7644f164146c66e1.tar.gz
elgg-24e3ff747614364d0d44fc1a7644f164146c66e1.tar.bz2
Moved default search hooks into search mod.
Using MySQL's MATCH ... AGAINST instead of likes for most searches. Changed 'tag' to 'q' while maintaining backward compatibility. git-svn-id: http://code.elgg.org/elgg/trunk@3633 36083f99-b078-4883-b0ff-0f9b5a30f544
Diffstat (limited to 'mod/search')
-rw-r--r--mod/search/index.php6
-rw-r--r--mod/search/search_hooks.php207
-rw-r--r--mod/search/start.php300
-rw-r--r--mod/search/views/default/page_elements/searchbox.php2
-rw-r--r--mod/search/views/default/search/listing.php15
5 files changed, 434 insertions, 96 deletions
diff --git a/mod/search/index.php b/mod/search/index.php
index 30f33f9f1..f9a96a60b 100644
--- a/mod/search/index.php
+++ b/mod/search/index.php
@@ -2,7 +2,9 @@
// $search_type == all || entities || trigger plugin hook
$search_type = get_input('search_type', 'all');
-$query = sanitise_string(get_input('query', get_input('tag')));
+
+// @todo there is a bug in get_input that makes variables have slashes sometimes.
+$query = sanitise_string(stripslashes(get_input('q', get_input('tag', '', FALSE), FALSE)));
// get limit and offset. override if on search dashboard, where only 2
// of each most recent entity types will be shown.
@@ -117,7 +119,7 @@ call search
*/
-$layout = elgg_view_layout('single_column', '', $results_html);
+$layout = elgg_view_layout('two_column_left_sidebar', '', $results_html);
page_draw($title, $layout);
diff --git a/mod/search/search_hooks.php b/mod/search/search_hooks.php
new file mode 100644
index 000000000..f673b7512
--- /dev/null
+++ b/mod/search/search_hooks.php
@@ -0,0 +1,207 @@
+<?php
+/**
+ * Elgg core search.
+ *
+ * @package Elgg
+ * @subpackage Core
+ * @author Curverider Ltd <info@elgg.com>, The MITRE Corporation <http://www.mitre.org>
+ * @link http://elgg.org/
+ */
+
+/**
+ * Return default results for searches on objects.
+ *
+ * @param unknown_type $hook
+ * @param unknown_type $type
+ * @param unknown_type $value
+ * @param unknown_type $params
+ * @return unknown_type
+ */
+function search_objects_hook($hook, $type, $value, $params) {
+ global $CONFIG;
+
+ $join = "JOIN {$CONFIG->dbprefix}objects_entity oe ON e.guid = oe.guid";
+ $params['joins'] = array($join);
+ $fields = array('title', 'description');
+
+ $where = search_get_where_sql('oe', $fields, $params);
+
+ $params['wheres'] = array($where);
+
+ //@todo allow sorting by recent time
+ $params['order_by'] = NULL;
+
+ $entities = elgg_get_entities($params);
+ $params['count'] = TRUE;
+ $count = elgg_get_entities($params);
+
+ // no need to continue if nothing here.
+ if (!$count) {
+ return array('entities' => array(), 'count' => $count);
+ }
+
+ // add the volatile data for why these entities have been returned.
+ foreach ($entities as $entity) {
+ //$title = search_get_highlighted_relevant_substrings($entity->title, $params['query']);
+ $title = search_get_relevant_substring($entity->title, $params['query'], '<strong class="searchMatch">', '</strong>');
+ $entity->setVolatileData('search_matched_title', $title);
+
+ $desc = search_get_relevant_substring($entity->description, $params['query'], '<strong class="searchMatch">', '</strong>');
+ $entity->setVolatileData('search_matched_description', $desc);
+ }
+
+ return array(
+ 'entities' => $entities,
+ 'count' => $count,
+ );
+}
+
+/**
+ * Return default results for searches on groups.
+ *
+ * @param unknown_type $hook
+ * @param unknown_type $type
+ * @param unknown_type $value
+ * @param unknown_type $params
+ * @return unknown_type
+ */
+function search_groups_hook($hook, $type, $value, $params) {
+ global $CONFIG;
+
+ $query = $params['query'];
+
+ $join = "JOIN {$CONFIG->dbprefix}groups_entity ge ON e.guid = ge.guid";
+ $params['joins'] = array($join);
+ $fields = array('name', 'description');
+
+ $where = search_get_where_sql('ge', $fields, $params);
+
+ $params['wheres'] = array($where);
+
+ //@todo allow sorting by recent time
+ $params['order_by'] = NULL;
+
+ $entities = elgg_get_entities($params);
+ $params['count'] = TRUE;
+ $count = elgg_get_entities($params);
+
+ // no need to continue if nothing here.
+ if (!$count) {
+ return array('entities' => array(), 'count' => $count);
+ }
+
+ // add the volatile data for why these entities have been returned.
+ foreach ($entities as $entity) {
+ $description = search_get_relevant_substring($entity->description, $query, '<strong class="searchMatch">', '</strong>');
+ $entity->setVolatileData('search_matched_title', $description);
+
+ $name = search_get_relevant_substring($entity->name, $query, '<strong class="searchMatch">', '</strong>');
+ $entity->setVolatileData('search_matched_description', $name);
+ }
+
+ return array(
+ 'entities' => $entities,
+ 'count' => $count,
+ );
+}
+
+/**
+ * Return default results for searches on users.
+ *
+ * @param unknown_type $hook
+ * @param unknown_type $type
+ * @param unknown_type $value
+ * @param unknown_type $params
+ * @return unknown_type
+ */
+function search_users_hook($hook, $type, $value, $params) {
+ global $CONFIG;
+
+ $query = $params['query'];
+
+ $join = "JOIN {$CONFIG->dbprefix}users_entity ue ON e.guid = ue.guid";
+ $params['joins'] = array($join);
+
+ // use like here because of the simplicity of the search
+ $where = "(ue.guid = e.guid
+ AND (ue.username LIKE '%$query%'
+ OR ue.name LIKE '%$query%'
+ )
+ )";
+ $params['wheres'] = array($where);
+
+ $entities = elgg_get_entities($params);
+ $params['count'] = TRUE;
+ $count = elgg_get_entities($params);
+
+ // no need to continue if nothing here.
+ if (!$count) {
+ return array('entities' => array(), 'count' => $count);
+ }
+
+ // add the volatile data for why these entities have been returned.
+ foreach ($entities as $entity) {
+ $username = search_get_relevant_substring($entity->username, $query, '<strong class="searchMatch">', '</strong>');
+ $entity->setVolatileData('search_matched_title', $username);
+
+ $name = search_get_relevant_substring($entity->name, $query, '<strong class="searchMatch">', '</strong>');
+ $entity->setVolatileData('search_matched_description', $name);
+ }
+
+ return array(
+ 'entities' => $entities,
+ 'count' => $count,
+ );
+}
+
+/**
+ * Return default results for searches on tags.
+ *
+ * @param unknown_type $hook
+ * @param unknown_type $type
+ * @param unknown_type $value
+ * @param unknown_type $params
+ * @return unknown_type
+ */
+function search_tags_hook($hook, $type, $value, $params) {
+ global $CONFIG;
+
+ $query = $params['query'];
+ $params['metadata_name_value_pair'] = array ('name' => 'tags', 'value' => $query, 'case_sensitive' => FALSE);
+
+ $entities = elgg_get_entities_from_metadata($params);
+ $params['count'] = TRUE;
+ $count = elgg_get_entities_from_metadata($params);
+
+ // no need to continue if nothing here.
+ if (!$count) {
+ return array('entities' => array(), 'count' => $count);
+ }
+
+ // add the volatile data for why these entities have been returned.
+ foreach ($entities as $entity) {
+ $tags = implode(',', $entity->tags);
+ $tags_str = search_get_relevant_substring($tags, $query, '<strong class="searchMatch">', '</strong>');
+ $entity->setVolatileData('search_matched_tags', $tags_str);
+ }
+
+ return array(
+ 'entities' => $entities,
+ 'count' => $count,
+ );
+}
+
+/**
+ * Register tags as a custom search type.
+ *
+ * @param unknown_type $hook
+ * @param unknown_type $type
+ * @param unknown_type $value
+ * @param unknown_type $params
+ * @return unknown_type
+ */
+function search_custom_types_tags_hook($hook, $type, $value, $params) {
+ $value[] = 'tags';
+ return $value;
+}
+
diff --git a/mod/search/start.php b/mod/search/start.php
index 4bd342285..47405450a 100644
--- a/mod/search/start.php
+++ b/mod/search/start.php
@@ -1,5 +1,4 @@
<?php
-
/**
* Elgg core search.
*
@@ -14,9 +13,31 @@
*
*/
function search_init() {
+ global $CONFIG;
+ require_once 'search_hooks.php';
+
// page handler for search actions and results
register_page_handler('search','search_page_handler');
+ // register some default search hooks
+ register_plugin_hook('search', 'object', 'search_objects_hook');
+ register_plugin_hook('search', 'user', 'search_users_hook');
+
+ // @todo pull this out into groups
+ register_plugin_hook('search', 'group', 'search_groups_hook');
+
+ // tags are a bit different.
+ // register a custom search type and a hook for that.
+ register_plugin_hook('search_types', 'get_types', 'search_custom_types_tags_hook');
+ register_plugin_hook('search', 'tags', 'search_tags_hook');
+
+ // get server min and max allowed chars for ft searching
+ $word_lens = get_data('SELECT @@ft_min_word_len as min, @@ft_max_word_len as max');
+
+ $CONFIG->search_info = array();
+ $CONFIG->search_info['min_chars'] = $word_lens[0]->min;
+ $CONFIG->search_info['max_chars'] = $word_lens[0]->max;
+
// add in CSS for search elements
extend_view('css', 'search/css');
}
@@ -29,114 +50,151 @@ function search_init() {
function search_page_handler($page) {
global $CONFIG;
- if(!get_input('tag')) {
- set_input('tag', $page[0]);
+ // if there is no q set, we're being called from a legacy installation
+ // it expects a search by tags.
+ // actually it doesn't, but maybe it should.
+ // maintain backward compatibility
+ if(!get_input('q', get_input('tag', NULL))) {
+ set_input('q', $page[0]);
+ //set_input('search_type', 'tags');
}
- include_once($CONFIG->path . "mod/search/index.php");
+ include_once('index.php');
}
/**
- * Core search hook.
- * Returns an object with two parts:
- * ->entities: an array of instantiated entities that have been decorated with
- * volatile "search" data indicating what they matched. These are
- * the entities to be displayed to the user on this page.
- * ->total: total number of entities overall. This function can update this
- * limit to ask for more pages in the pagination.
+ * Return a string with highlighted matched elements.
+ * Checks for "s
+ * Provides context for matched elements.
+ * Will not return more than $max_length of full context.
+ * Only highlights words
+ *
+ * @param unknown_type $haystack
+ * @param unknown_type $need
+ * @param unknown_type $context
+ * @param unknown_type $max_length
+ * @return unknown_type
*/
-function search_original_hook($hook, $type, $returnvalue, $params) {
- global $CONFIG;
+function search_get_highlighted_relevant_substrings($haystack, $needle, $min_match_context = 15, $max_length = 250) {
+ $haystack = strip_tags($haystack);
+ $haystack_lc = strtolower($haystack);
+
+ // for now don't worry about "s or boolean operators
+ $needle = str_replace(array('"', '-', '+', '~'), '', stripslashes(strip_tags($needle)));
+ $words = explode(' ', $needle);
+
+ $min_chars = $CONFIG->search_info['min_chars'];
+ // if > ft_min_word == not running in literal mode.
+ if ($needle >= $min_chars) {
+ // clean out any words that are ignored by mysql
+ foreach ($words as $i => $word) {
+ if (strlen($word) < $min_chars) {
+ unset ($words[$i]);
+ }
+ }
+ }
- var_dump($CONFIG->hooks);
-
- $tag = $params['tag'];
- $offset = $params['offset']; // starting page
- $limit = $params['limit']; // number per page
- $searchtype = $params['searchtype']; // the search type we're looking for
- $object_type = $params['object_type'];
- $subtype = $params['subtype'];
- $owner_guid = $params['owner_guid'];
- $tagtype = $params['tagtype'];
-
- $count = get_entities_from_metadata($tagtype, elgg_strtolower($tag), $object_type, $subtype, $owner_guid, $limit, $offset, "", 0, TRUE, FALSE);
- $ents = get_entities_from_metadata($tagtype, elgg_strtolower($tag), $object_type, $subtype, $owner_guid, $limit, $offset, "", 0, FALSE, FALSE);
-
-// $options = array(
-// 'metadata_name_value_pair' => array('name' => $params['tagtype'], 'value' => $params['tag'], 'case_sensitive' => false),
-// 'offset' => $params['offset'],
-// 'limit' => $params['limit'],
-// 'type' => $params['object_type'],
-// 'subtype' => $params['subtype'],
-// 'owner_guid' => $params['owner_guid']
-// );
-//
-// $count = elgg_get_entities_from_metadata(array_merge($options, array('count' => TRUE)));
-// $entities = elgg_get_entities_from_metadata($options);
-
- /*
- * Foreach entity
- * get the metadata keys
- * If the value matches, hang onto the key
- * add all the matched keys to VolatileData
- * This tells us *why* each entity matched
- */
- foreach ($ents as $ent) {
- $metadata = get_metadata_for_entity($ent->getGUID());
- $matched = array();
- if ($metadata) {
- foreach ($metadata as $tuple) {
- if ($tag === $tuple->value) {
- // This is one of the matching elements
- $matched[] = $tuple->name;
- }
+ $substr_counts = array();
+ $str_pos = array();
+ // get the full count of matches.
+ foreach ($words as $word) {
+ $word = strtolower($word);
+ $count = substr_count($haystack, $word);
+ $word_len = strlen($word);
+
+ // find the start positions for the words
+ // get the context for words based upon
+ if ($count > 1) {
+ $str_pos[$word] = array();
+ $offset = 0;
+ while (FALSE !== $pos = strpos($haystack, $word, $offset)) {
+ $str_pos[$word][] = $pos;
+ $offset += $pos + $word_len;
}
- $ent->setVolatileData('search', $matched);
+ } else {
+ $str_pos[$word] = array(strpos($haystack, $word));
}
+ $substr_counts[$word] = $count;
}
- // merge in our entities with any coming in from elsewhere
- $returnvalue->entities = array_merge($returnvalue->entities, $ents);
+//A test with multiple words and now more in the subject too because words need to be everywhere
- // expand the total entity count if necessary
- if ($count > $returnvalue->total) {
- $returnvalue->total = $count;
- }
+ // sort by order of occurence
+ krsort($substr_counts);
+ $full_count = array_sum($substr_counts);
- return $returnvalue;
-}
-/**
- * Provides default search for registered entity subtypes.
- * Entity types should be dealt with in the entity classes. (Objects are an exception).
- *
- * @param unknown_type $hook
- * @param unknown_type $type
- * @param unknown_type $returnvalue
- * @param unknown_type $params
- * @return unknown_type
- */
-function search_registered_entities($hook, $type, $returnvalue, $params) {
- $entity_types = get_registered_entity_types();
- foreach ($entity_types as $type => $subtypes) {
- if (is_array($subtypes) && count($subtypes)) {
- }
+
+ // get full number of matches against all words to see how many we actually want to look at.
+
+
+
+
+// $desc = search_get_relevant_substring($entity->description, $params['query'], '<strong class="searchMatch">', '</strong>');
+
+
+ $params['query'];
+ // "this is"just a test "silly person"
+
+ // check for "s
+ $words_quotes = explode('"', $needle);
+
+ $words_orig = explode(' ', $needle);
+ $words = array();
+
+ foreach ($words_orig as $i => $word) {
+ // figure out if we have a special operand
+ $operand = substr($word, 0, 1);
+ switch($operand) {
+ case '"':
+ // find the matching " if any. else, remove the "
+ if (substr_count($query, '"') < 2) {
+ $words[] = substr($word, 1);
+ } else {
+ $word = substr($word, 1);
+ $word_i = $i;
+ while ('"' != strpos($words_orig[$word_i], '"')) {
+ $word .= " {$words_orig[$word_i]}";
+ unset($words_orig[$word_i]);
+ }
+
+
+ }
+
+ break;
+
+ case '+':
+ // remove +
+ $words[] = substr($word, 1);
+ break;
+
+ case '~':
+ case '-':
+ // remove this from highlighted list.
+
+ break;
+ }
}
-}
-/**
- * return our base search types (right now, we have none)
- */
-function search_base_search_types_hook($hook, $type, $returnvalue, $params) {
- if (!is_array($returnvalue)) {
- $returnvalue = array();
+ // pick out " queries
+ if (substr_count($query, '"') >= 2) {
+
}
- return $returnvalue;
-}
+ // ignore queries starting with -
+
+ // @todo figure out a way to "center" the matches within the max_length.
+ // if only one match, its context is $context + $max_length / 2
+ // if 2 matches, its context is $context + $max_length / 4
+ // if 3 matches, its context is $context + $max_length / 6
+ // $context per match = $min_match_context + ($max_length / $num_count_match)
+
+ // if $max_length / ($matched_count * 2) < $context
+ // only match against the first X matches where $context >= $context
+}
/**
* Returns a matching string with $context amount of context, optionally
@@ -148,7 +206,7 @@ function search_base_search_types_hook($hook, $type, $returnvalue, $params) {
* @param str $needle
* @param str $before
* @param str $after
- * @param str $context
+ * @param int $context
* @return str
*/
function search_get_relevant_substring($haystack, $needle, $before = '', $after = '', $context = 75) {
@@ -181,7 +239,7 @@ function search_get_relevant_substring($haystack, $needle, $before = '', $after
}
// add elipses to end.
- if ($start_pos + $context < strlen($haystack)) {
+ if ($pos + strlen($needle) + $context*2 < strlen($haystack)) {
$matched = "$matched...";
}
@@ -194,7 +252,15 @@ function search_get_relevant_substring($haystack, $needle, $before = '', $after
}
-
+/**
+ * Passes entities, count, and original params to the view functions for
+ * search type.
+ *
+ * @param array $entities
+ * @param int $count
+ * @param array $params
+ * @return string
+ */
function search_get_listing_html($entities, $count, $params) {
if (!is_array($entities) || !$count) {
return FALSE;
@@ -235,6 +301,58 @@ function search_get_listing_html($entities, $count, $params) {
return FALSE;
}
+/**
+ * Returns a where clause for a search query.
+ *
+ * @param str $table Prefix for table to search on
+ * @param array $fields Fields to match against
+ * @param array $params Original search params
+ * @return str
+ */
+function search_get_where_sql($table, $fields, $params) {
+ global $CONFIG;
+ $query = $params['query'];
+
+ // add the table prefix to the fields
+ foreach ($fields as $i => $field) {
+ $fields[$i] = "$table.$field";
+ }
+
+ // if query is shorter than the min for fts words
+ // it's likely a single acronym or similar
+ // switch to literal mode
+ if (strlen($query) < $CONFIG->search_info['min_chars']) {
+ $likes = array();
+ foreach ($fields as $field) {
+ $likes[] = "$field LIKE '%$query%'";
+ }
+ $likes_str = implode(' OR ', $likes);
+ $where = "($table.guid = e.guid AND ($likes_str))";
+ } else {
+ // if using advanced or paired "s, switch into boolean mode
+ if ((isset($params['advanced_search']) && $params['advanced_search']) || substr_count($query, '"') >= 2 ) {
+ $options = 'IN BOOLEAN MODE';
+ } else {
+ $options = 'IN NATURAL LANGUAGE MODE';
+ }
+
+ // if short query, use query expansion.
+ if (strlen($query) < 6) {
+ $options .= ' WITH QUERY EXPANSION';
+ }
+ // if query is shorter than the ft_min_word_len switch to literal mode.
+ $fields_str = implode(',', $fields);
+ $where = "($table.guid = e.guid AND (MATCH ($fields_str) AGAINST ('$query' $options)))";
+ }
+
+ return $where;
+}
+
+function search_get_query_where_sql($table, $query) {
+ // if there are multiple "s or 's it's a literal string.
+
+}
+
/** Register init system event **/
register_elgg_event_handler('init','system','search_init'); \ No newline at end of file
diff --git a/mod/search/views/default/page_elements/searchbox.php b/mod/search/views/default/page_elements/searchbox.php
index 4bab36be9..cfc0b953b 100644
--- a/mod/search/views/default/page_elements/searchbox.php
+++ b/mod/search/views/default/page_elements/searchbox.php
@@ -1,4 +1,4 @@
<form id="searchform" action="<?php echo $vars['url']; ?>pg/search/" method="get">
- <input type="text" size="21" name="tag" value="<?php echo elgg_echo('search'); ?>" onclick="if (this.value=='<?php echo elgg_echo('search'); ?>') { this.value='' }" class="search_input" />
+ <input type="text" size="21" name="q" value="<?php echo elgg_echo('search'); ?>" onclick="if (this.value=='<?php echo elgg_echo('search'); ?>') { this.value='' }" class="search_input" />
<input type="submit" value="<?php echo elgg_echo('search:go'); ?>" class="search_submit_button" />
</form>
diff --git a/mod/search/views/default/search/listing.php b/mod/search/views/default/search/listing.php
index 2ed657547..270e33267 100644
--- a/mod/search/views/default/search/listing.php
+++ b/mod/search/views/default/search/listing.php
@@ -13,7 +13,7 @@
<?php
$entities = $vars['entities'];
-$count = $vars['count'];
+$count = $vars['count'] - count($vars['entities']);
if (!is_array($vars['entities']) || !count($vars['entities'])) {
return FALSE;
@@ -22,6 +22,17 @@ if (!is_array($vars['entities']) || !count($vars['entities'])) {
$title_str = elgg_echo("item:{$vars['params']['type']}:{$vars['params']['subtype']}");
$body = elgg_view_title($title_str);
+$query = htmlspecialchars(http_build_query(
+ array(
+ 'q' => $vars['params']['query'],
+ 'type' => $vars['params']['type'],
+ 'subtype' => $vars['params']['subtype']
+ )
+));
+
+$url = "{$vars['url']}pg/search?$query";
+$more = "<a href=\"$url\">+$count more $title_str</a>";
+
echo elgg_view('page_elements/contentwrapper', array('body' => $body));
foreach ($entities as $entity) {
@@ -44,7 +55,7 @@ foreach ($entities as $entity) {
<h3 class="searchTitle">$title</h3>
<span class="searchDetails">
<span class="searchDescription">$description</span><br />
- $icon - $time - <a href="">More $title_str</a> -
+ $icon $time - $more</a>
</span>
</span>
___END;