aboutsummaryrefslogtreecommitdiff
path: root/mod/search/start.php
diff options
context:
space:
mode:
authorbrettp <brettp@36083f99-b078-4883-b0ff-0f9b5a30f544>2009-11-07 20:57:32 +0000
committerbrettp <brettp@36083f99-b078-4883-b0ff-0f9b5a30f544>2009-11-07 20:57:32 +0000
commit24e3ff747614364d0d44fc1a7644f164146c66e1 (patch)
treecab9495aa3a6daf5c1aa3bac6de61e3436e77380 /mod/search/start.php
parent413ea817c78a2cd7cbe3bebad1623daaaeea6394 (diff)
downloadelgg-24e3ff747614364d0d44fc1a7644f164146c66e1.tar.gz
elgg-24e3ff747614364d0d44fc1a7644f164146c66e1.tar.bz2
Moved default search hooks into search mod.
Using MySQL's MATCH ... AGAINST instead of likes for most searches. Changed 'tag' to 'q' while maintaining backward compatibility. git-svn-id: http://code.elgg.org/elgg/trunk@3633 36083f99-b078-4883-b0ff-0f9b5a30f544
Diffstat (limited to 'mod/search/start.php')
-rw-r--r--mod/search/start.php300
1 files changed, 209 insertions, 91 deletions
diff --git a/mod/search/start.php b/mod/search/start.php
index 4bd342285..47405450a 100644
--- a/mod/search/start.php
+++ b/mod/search/start.php
@@ -1,5 +1,4 @@
<?php
-
/**
* Elgg core search.
*
@@ -14,9 +13,31 @@
*
*/
function search_init() {
+ global $CONFIG;
+ require_once 'search_hooks.php';
+
// page handler for search actions and results
register_page_handler('search','search_page_handler');
+ // register some default search hooks
+ register_plugin_hook('search', 'object', 'search_objects_hook');
+ register_plugin_hook('search', 'user', 'search_users_hook');
+
+ // @todo pull this out into groups
+ register_plugin_hook('search', 'group', 'search_groups_hook');
+
+ // tags are a bit different.
+ // register a custom search type and a hook for that.
+ register_plugin_hook('search_types', 'get_types', 'search_custom_types_tags_hook');
+ register_plugin_hook('search', 'tags', 'search_tags_hook');
+
+ // get server min and max allowed chars for ft searching
+ $word_lens = get_data('SELECT @@ft_min_word_len as min, @@ft_max_word_len as max');
+
+ $CONFIG->search_info = array();
+ $CONFIG->search_info['min_chars'] = $word_lens[0]->min;
+ $CONFIG->search_info['max_chars'] = $word_lens[0]->max;
+
// add in CSS for search elements
extend_view('css', 'search/css');
}
@@ -29,114 +50,151 @@ function search_init() {
function search_page_handler($page) {
global $CONFIG;
- if(!get_input('tag')) {
- set_input('tag', $page[0]);
+ // if there is no q set, we're being called from a legacy installation
+ // it expects a search by tags.
+ // actually it doesn't, but maybe it should.
+ // maintain backward compatibility
+ if(!get_input('q', get_input('tag', NULL))) {
+ set_input('q', $page[0]);
+ //set_input('search_type', 'tags');
}
- include_once($CONFIG->path . "mod/search/index.php");
+ include_once('index.php');
}
/**
- * Core search hook.
- * Returns an object with two parts:
- * ->entities: an array of instantiated entities that have been decorated with
- * volatile "search" data indicating what they matched. These are
- * the entities to be displayed to the user on this page.
- * ->total: total number of entities overall. This function can update this
- * limit to ask for more pages in the pagination.
+ * Return a string with highlighted matched elements.
+ * Checks for "s
+ * Provides context for matched elements.
+ * Will not return more than $max_length of full context.
+ * Only highlights words
+ *
+ * @param unknown_type $haystack
+ * @param unknown_type $need
+ * @param unknown_type $context
+ * @param unknown_type $max_length
+ * @return unknown_type
*/
-function search_original_hook($hook, $type, $returnvalue, $params) {
- global $CONFIG;
+function search_get_highlighted_relevant_substrings($haystack, $needle, $min_match_context = 15, $max_length = 250) {
+ $haystack = strip_tags($haystack);
+ $haystack_lc = strtolower($haystack);
+
+ // for now don't worry about "s or boolean operators
+ $needle = str_replace(array('"', '-', '+', '~'), '', stripslashes(strip_tags($needle)));
+ $words = explode(' ', $needle);
+
+ $min_chars = $CONFIG->search_info['min_chars'];
+ // if > ft_min_word == not running in literal mode.
+ if ($needle >= $min_chars) {
+ // clean out any words that are ignored by mysql
+ foreach ($words as $i => $word) {
+ if (strlen($word) < $min_chars) {
+ unset ($words[$i]);
+ }
+ }
+ }
- var_dump($CONFIG->hooks);
-
- $tag = $params['tag'];
- $offset = $params['offset']; // starting page
- $limit = $params['limit']; // number per page
- $searchtype = $params['searchtype']; // the search type we're looking for
- $object_type = $params['object_type'];
- $subtype = $params['subtype'];
- $owner_guid = $params['owner_guid'];
- $tagtype = $params['tagtype'];
-
- $count = get_entities_from_metadata($tagtype, elgg_strtolower($tag), $object_type, $subtype, $owner_guid, $limit, $offset, "", 0, TRUE, FALSE);
- $ents = get_entities_from_metadata($tagtype, elgg_strtolower($tag), $object_type, $subtype, $owner_guid, $limit, $offset, "", 0, FALSE, FALSE);
-
-// $options = array(
-// 'metadata_name_value_pair' => array('name' => $params['tagtype'], 'value' => $params['tag'], 'case_sensitive' => false),
-// 'offset' => $params['offset'],
-// 'limit' => $params['limit'],
-// 'type' => $params['object_type'],
-// 'subtype' => $params['subtype'],
-// 'owner_guid' => $params['owner_guid']
-// );
-//
-// $count = elgg_get_entities_from_metadata(array_merge($options, array('count' => TRUE)));
-// $entities = elgg_get_entities_from_metadata($options);
-
- /*
- * Foreach entity
- * get the metadata keys
- * If the value matches, hang onto the key
- * add all the matched keys to VolatileData
- * This tells us *why* each entity matched
- */
- foreach ($ents as $ent) {
- $metadata = get_metadata_for_entity($ent->getGUID());
- $matched = array();
- if ($metadata) {
- foreach ($metadata as $tuple) {
- if ($tag === $tuple->value) {
- // This is one of the matching elements
- $matched[] = $tuple->name;
- }
+ $substr_counts = array();
+ $str_pos = array();
+ // get the full count of matches.
+ foreach ($words as $word) {
+ $word = strtolower($word);
+ $count = substr_count($haystack, $word);
+ $word_len = strlen($word);
+
+ // find the start positions for the words
+ // get the context for words based upon
+ if ($count > 1) {
+ $str_pos[$word] = array();
+ $offset = 0;
+ while (FALSE !== $pos = strpos($haystack, $word, $offset)) {
+ $str_pos[$word][] = $pos;
+ $offset += $pos + $word_len;
}
- $ent->setVolatileData('search', $matched);
+ } else {
+ $str_pos[$word] = array(strpos($haystack, $word));
}
+ $substr_counts[$word] = $count;
}
- // merge in our entities with any coming in from elsewhere
- $returnvalue->entities = array_merge($returnvalue->entities, $ents);
+//A test with multiple words and now more in the subject too because words need to be everywhere
- // expand the total entity count if necessary
- if ($count > $returnvalue->total) {
- $returnvalue->total = $count;
- }
+ // sort by order of occurence
+ krsort($substr_counts);
+ $full_count = array_sum($substr_counts);
- return $returnvalue;
-}
-/**
- * Provides default search for registered entity subtypes.
- * Entity types should be dealt with in the entity classes. (Objects are an exception).
- *
- * @param unknown_type $hook
- * @param unknown_type $type
- * @param unknown_type $returnvalue
- * @param unknown_type $params
- * @return unknown_type
- */
-function search_registered_entities($hook, $type, $returnvalue, $params) {
- $entity_types = get_registered_entity_types();
- foreach ($entity_types as $type => $subtypes) {
- if (is_array($subtypes) && count($subtypes)) {
- }
+
+ // get full number of matches against all words to see how many we actually want to look at.
+
+
+
+
+// $desc = search_get_relevant_substring($entity->description, $params['query'], '<strong class="searchMatch">', '</strong>');
+
+
+ $params['query'];
+ // "this is"just a test "silly person"
+
+ // check for "s
+ $words_quotes = explode('"', $needle);
+
+ $words_orig = explode(' ', $needle);
+ $words = array();
+
+ foreach ($words_orig as $i => $word) {
+ // figure out if we have a special operand
+ $operand = substr($word, 0, 1);
+ switch($operand) {
+ case '"':
+ // find the matching " if any. else, remove the "
+ if (substr_count($query, '"') < 2) {
+ $words[] = substr($word, 1);
+ } else {
+ $word = substr($word, 1);
+ $word_i = $i;
+ while ('"' != strpos($words_orig[$word_i], '"')) {
+ $word .= " {$words_orig[$word_i]}";
+ unset($words_orig[$word_i]);
+ }
+
+
+ }
+
+ break;
+
+ case '+':
+ // remove +
+ $words[] = substr($word, 1);
+ break;
+
+ case '~':
+ case '-':
+ // remove this from highlighted list.
+
+ break;
+ }
}
-}
-/**
- * return our base search types (right now, we have none)
- */
-function search_base_search_types_hook($hook, $type, $returnvalue, $params) {
- if (!is_array($returnvalue)) {
- $returnvalue = array();
+ // pick out " queries
+ if (substr_count($query, '"') >= 2) {
+
}
- return $returnvalue;
-}
+ // ignore queries starting with -
+
+ // @todo figure out a way to "center" the matches within the max_length.
+ // if only one match, its context is $context + $max_length / 2
+ // if 2 matches, its context is $context + $max_length / 4
+ // if 3 matches, its context is $context + $max_length / 6
+ // $context per match = $min_match_context + ($max_length / $num_count_match)
+
+ // if $max_length / ($matched_count * 2) < $context
+ // only match against the first X matches where $context >= $context
+}
/**
* Returns a matching string with $context amount of context, optionally
@@ -148,7 +206,7 @@ function search_base_search_types_hook($hook, $type, $returnvalue, $params) {
* @param str $needle
* @param str $before
* @param str $after
- * @param str $context
+ * @param int $context
* @return str
*/
function search_get_relevant_substring($haystack, $needle, $before = '', $after = '', $context = 75) {
@@ -181,7 +239,7 @@ function search_get_relevant_substring($haystack, $needle, $before = '', $after
}
// add elipses to end.
- if ($start_pos + $context < strlen($haystack)) {
+ if ($pos + strlen($needle) + $context*2 < strlen($haystack)) {
$matched = "$matched...";
}
@@ -194,7 +252,15 @@ function search_get_relevant_substring($haystack, $needle, $before = '', $after
}
-
+/**
+ * Passes entities, count, and original params to the view functions for
+ * search type.
+ *
+ * @param array $entities
+ * @param int $count
+ * @param array $params
+ * @return string
+ */
function search_get_listing_html($entities, $count, $params) {
if (!is_array($entities) || !$count) {
return FALSE;
@@ -235,6 +301,58 @@ function search_get_listing_html($entities, $count, $params) {
return FALSE;
}
+/**
+ * Returns a where clause for a search query.
+ *
+ * @param str $table Prefix for table to search on
+ * @param array $fields Fields to match against
+ * @param array $params Original search params
+ * @return str
+ */
+function search_get_where_sql($table, $fields, $params) {
+ global $CONFIG;
+ $query = $params['query'];
+
+ // add the table prefix to the fields
+ foreach ($fields as $i => $field) {
+ $fields[$i] = "$table.$field";
+ }
+
+ // if query is shorter than the min for fts words
+ // it's likely a single acronym or similar
+ // switch to literal mode
+ if (strlen($query) < $CONFIG->search_info['min_chars']) {
+ $likes = array();
+ foreach ($fields as $field) {
+ $likes[] = "$field LIKE '%$query%'";
+ }
+ $likes_str = implode(' OR ', $likes);
+ $where = "($table.guid = e.guid AND ($likes_str))";
+ } else {
+ // if using advanced or paired "s, switch into boolean mode
+ if ((isset($params['advanced_search']) && $params['advanced_search']) || substr_count($query, '"') >= 2 ) {
+ $options = 'IN BOOLEAN MODE';
+ } else {
+ $options = 'IN NATURAL LANGUAGE MODE';
+ }
+
+ // if short query, use query expansion.
+ if (strlen($query) < 6) {
+ $options .= ' WITH QUERY EXPANSION';
+ }
+ // if query is shorter than the ft_min_word_len switch to literal mode.
+ $fields_str = implode(',', $fields);
+ $where = "($table.guid = e.guid AND (MATCH ($fields_str) AGAINST ('$query' $options)))";
+ }
+
+ return $where;
+}
+
+function search_get_query_where_sql($table, $query) {
+ // if there are multiple "s or 's it's a literal string.
+
+}
+
/** Register init system event **/
register_elgg_event_handler('init','system','search_init'); \ No newline at end of file