aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbrettp <brettp@36083f99-b078-4883-b0ff-0f9b5a30f544>2009-11-15 04:27:19 +0000
committerbrettp <brettp@36083f99-b078-4883-b0ff-0f9b5a30f544>2009-11-15 04:27:19 +0000
commita4df77a015527dea272d8b643e89a33f8929361b (patch)
tree903896a5c55e3337d02efea214c16ab2f7ce4b64
parent9fb9b8565d6a95df0264ef0054106df6b2d3943e (diff)
downloadelgg-a4df77a015527dea272d8b643e89a33f8929361b.tar.gz
elgg-a4df77a015527dea272d8b643e89a33f8929361b.tar.bz2
Search changes.
Fixes #1376: Only fetching ft_min_word_length if the query != false. Using IN BOOLEAN MODE for metadata search (comments, tags) to avoid a fulltext index on the metastrings table. Slower for search, faster for site. Simplified and modularized logic for pulling out relevant substrings and highlighting. Corrected the elipse oddities in relevancy substring concatenation. Added pagination on non-homepages. Added missing language strings. Updated and standardized comment results listings. Repeat query in searchbar. Dealing with comments on unavailable entities better (though not well). Increased default word context to 30 characters. Decreased default context max length to 300 characters. Promise to start making atomic commits real soon now. git-svn-id: http://code.elgg.org/elgg/trunk@3684 36083f99-b078-4883-b0ff-0f9b5a30f544
-rw-r--r--mod/search/index.php182
-rw-r--r--mod/search/languages/en.php22
-rw-r--r--mod/search/search_hooks.php40
-rw-r--r--mod/search/start.php480
-rw-r--r--mod/search/views/default/page_elements/searchbox.php14
-rw-r--r--mod/search/views/default/search/comments/listing.php70
-rw-r--r--mod/search/views/default/search/css.php3
-rw-r--r--mod/search/views/default/search/listing.php55
8 files changed, 351 insertions, 515 deletions
diff --git a/mod/search/index.php b/mod/search/index.php
index ee6da32a8..e7081ecb5 100644
--- a/mod/search/index.php
+++ b/mod/search/index.php
@@ -27,11 +27,9 @@ $params = array(
// 'tag_type' => $tag_type,
'owner_guid' => $owner_guid,
// 'friends' => $friends
+ 'pagination' => ($search_type == 'all') ? FALSE : TRUE
);
-$results_html = '';
-//$results_html .= elgg_view_title(elgg_echo('search:results')) . "<input type=\"text\" value=\"$query\" />";
-$results_html .= elgg_view_title(elgg_echo('search:results'));
$types = get_registered_entity_types();
$custom_types = trigger_plugin_hook('search_types', 'get_types', $params, array());
@@ -105,13 +103,18 @@ foreach ($custom_types as $type) {
// check that we have an actual query
if (!$query) {
- $body .= "No query.";
+ $body = elgg_view_title(elgg_echo('search:search_error'));
+ $body .= elgg_view('page_elements/contentwrapper', array('body' => elgg_echo('search:no_query')));
+
$layout = elgg_view_layout('two_column_left_sidebar', '', $body);
page_draw($title, $layout);
return;
}
+// start the actual search
+$results_html = '';
+
if ($search_type == 'all' || $search_type == 'entities') {
// to pass the correct search type to the views
$params['search_type'] = 'entities';
@@ -199,177 +202,18 @@ if ($search_type != 'entities' || $search_type == 'all') {
}
}
-//if ($search_type !== 'all') {
-// var_dump('here');
-// $entities = trigger_plugin_hook('search', $search_type, '', $return);
-//}
-/*
+// highlight search terms
+$searched_words = search_remove_ignored_words($query, 'array');
+$highlighted_query = search_highlight_words($searched_words, $query);
-call search_section_start to display long bar with types and titles
-call search
-
-*/
+$body = elgg_view_title(sprintf(elgg_echo('search:results'), "\"$highlighted_query\""));
if (!$results_html) {
- $body = elgg_echo('search:no_results');
+ $body .= elgg_view('page_elements/contentwrapper', array('body' => elgg_echo('search:no_results')));
} else {
- $body = $results_html;
+ $body .= $results_html;
}
$layout = elgg_view_layout('two_column_left_sidebar', '', $body);
page_draw($title, $layout);
-
-
-
-
-
-
-
-return;
-
-
-/** Main search page */
-
-global $CONFIG;
-
-$tag = get_input('tag');
-$offset = get_input('offset', 0);
-$viewtype = get_input('search_viewtype','list');
-if ($viewtype == 'gallery') {
- $limit = get_input('limit', 12); // 10 items in list view
-} else {
- $limit = get_input('limit', 10); // 12 items in gallery view
-}
-$searchtype = get_input('searchtype', 'all');
-$type = get_input('type', '');
-$subtype = get_input('subtype', '');
-$owner_guid = get_input('owner_guid', '');
-$tagtype = get_input('tagtype', '');
-$friends = (int)get_input('friends', 0);
-$title = sprintf(elgg_echo('searchtitle'), $tag);
-
-if (substr_count($owner_guid, ',')) {
- $owner_guid_array = explode(',', $owner_guid);
-} else {
- $owner_guid_array = $owner_guid;
-}
-if ($friends > 0) {
- if ($friends = get_user_friends($friends, '', 9999)) {
- $owner_guid_array = array();
- foreach($friends as $friend) {
- $owner_guid_array[] = $friend->guid;
- }
- } else {
- $owner_guid = -1;
- }
-}
-
-// Set up submenus
-if ($types = get_registered_entity_types()) {
- foreach($types as $ot => $subtype_array) {
- if (is_array($subtype_array) && count($subtype_array)) {
- foreach($subtype_array as $object_subtype) {
- $label = 'item:' . $ot;
- if (!empty($object_subtype)) {
- $label .= ':' . $object_subtype;
- }
-
- $data = http_build_query(array(
- 'tag' => urlencode($tag),
- 'subtype' => $object_subtype,
- 'type' => urlencode($ot),
- //'tagtype' => urlencode($md_type),
- 'owner_guid' => urlencode($owner_guid)
- ));
-
- $url = "{$CONFIG->wwwroot}pg/search/?$data";
-
- add_submenu_item(elgg_echo($label), $url);
- }
- }
- }
-
- $data = http_build_query(array(
- 'tag' => urlencode($tag),
- 'owner_guid' => urlencode($owner_guid)
- ));
-
- add_submenu_item(elgg_echo('all'), "{$CONFIG->wwwroot}pg/search/?$data");
-}
-
-// pull in search types for external or aggregated searches.
-if ($search_types = trigger_plugin_hook('search', 'types', '', NULL, array())) {
-
-}
-
-$body = '';
-if (!empty($tag)) {
- // start with blank results.
- $results = array(
- 'entities' => array(),
- 'total' => 0
- );
-
- // do the actual searchts
- $params = array(
- 'tag' => $tag,
- 'offset' => $offset,
- 'limit' => $limit,
- 'searchtype' => $searchtype,
- 'type' => $type,
- 'subtype' => $subtype,
- 'tagtype' => $tagtype,
- 'owner_guid' => $owner_guid_array
- );
-
- $results = trigger_plugin_hook('search', 'entities', $params, $results);
-
- if (empty($type) && empty($subtype)) {
- $title = sprintf(elgg_echo('searchtitle'),$tag);
- } else {
- if (empty($type)) {
- $type = 'object';
- }
- $itemtitle = 'item:' . $type;
- if (!empty($subtype)) {
- $itemtitle .= ':' . $subtype;
- }
- $itemtitle = elgg_echo($itemtitle);
- $title = sprintf(elgg_echo('advancedsearchtitle'),$itemtitle,$tag);
- }
-
- $body .= elgg_view_title($title); // elgg_view_title(sprintf(elgg_echo('searchtitle'),$tag));
-
- // call the old (now-deprecated) search hook here
- $body .= trigger_plugin_hook('search','',$tag, '');
-
- $body .= elgg_view('search/startblurb', array('query' => $query));
-
- if ($results->total > 0) {
- $body .= elgg_view('search/entity_list', array(
- 'entities' => $results->entities,
- 'count' => $results->total,
- 'offset' => $offset,
- 'limit' => $limit,
- 'baseurl' => $_SERVER['REQUEST_URI'],
- 'fullview' => false,
- 'context' => 'search',
- 'viewtypetoggle' => true,
- 'viewtype' => $viewtype,
- 'pagination' => true
- ));
- } else {
- $body .= elgg_view('page_elements/contentwrapper', array('body' => elgg_echo('search:noresults')));
- }
-
- elgg_view_entity_list($results->entities, count($results->entities), 0, count($results->entities), false);
-} else {
- // if no tag was given, give the user a box to input a search term
- $body .= elgg_view_title(elgg_echo('search:enterterm'));
- $body .= elgg_view('page_elements/contentwrapper', array('body' => '<div>' . elgg_view('page_elements/searchbox') . '</div>'));
-}
-
-$layout = elgg_view_layout('two_column_left_sidebar','',$body);
-
-page_draw($title, $layout); \ No newline at end of file
diff --git a/mod/search/languages/en.php b/mod/search/languages/en.php
index 52c29c7ed..f601256d0 100644
--- a/mod/search/languages/en.php
+++ b/mod/search/languages/en.php
@@ -1,10 +1,20 @@
<?php
- $language_array = array('search:enterterm' => 'Enter a search term:',
- 'search:noresults' => 'No results.',
- 'search:matched' => 'Matched: '
- );
+$language_array = array(
+ 'search:enter_term' => 'Enter a search term:',
+ 'search:no_results' => 'No results.',
+ 'search:matched' => 'Matched: ',
+ 'search:results' => 'Results for %s',
+ 'search:no_query' => 'Please enter a query to search.',
+ 'search:search_error' => 'Error',
-add_translation('en', $language_array);
+ 'search:more' => '+%s more %s',
+
+ 'search_types:tags' => 'Tags',
-?>
+ 'search_types:comments' => 'Comments',
+ 'search:comment_on' => 'Comment on "%s"',
+ 'search:unavailable_entity' => 'Unavailable Entity',
+);
+
+add_translation('en', $language_array);
diff --git a/mod/search/search_hooks.php b/mod/search/search_hooks.php
index f7a49400f..8f0a62703 100644
--- a/mod/search/search_hooks.php
+++ b/mod/search/search_hooks.php
@@ -229,7 +229,10 @@ function search_comments_hook($hook, $type, $value, $params) {
);
$fields = array('string');
- $search_where = search_get_where_sql('msv', $fields, $params);
+
+ // force IN BOOLEAN MODE since fulltext isn't
+ // available on metastrings (and boolean mode doesn't need it)
+ $search_where = search_get_where_sql('msv', $fields, $params, FALSE);
$e_access = get_access_sql_suffix('e');
$a_access = get_access_sql_suffix('a');
@@ -245,17 +248,44 @@ function search_comments_hook($hook, $type, $value, $params) {
LIMIT {$params['offset']}, {$params['limit']}
";
+
$comments = get_data($q);
+//elgg_get_entities()
+ $q = "SELECT count(DISTINCT a.id) as total FROM {$CONFIG->dbprefix}annotations a
+ JOIN {$CONFIG->dbprefix}metastrings msn ON a.name_id = msn.id
+ JOIN {$CONFIG->dbprefix}metastrings msv ON a.value_id = msv.id
+ JOIN {$CONFIG->dbprefix}entities e ON a.entity_guid = e.guid
+ WHERE msn.string IN ('generic_comment', 'group_topic_post')
+ AND ($search_where)
+ AND $e_access
+ AND $a_access
+ ";
+
+ $result = get_data($q);
+ $count = $result[0]->total;
+ // @todo if plugins are disabled causing subtypes
+ // to be invalid and there are comments on entities of those subtypes,
+ // the counts will be wrong here and results might not show up correctly,
+ // especially on the search landing page, which only pulls out two results.
+
+ // probably better to check against valid subtypes than to do what I'm doing.
+
// need to return actual entities
// add the volatile data for why these entities have been returned.
$entities = array();
foreach ($comments as $comment) {
- $tags = implode(',', $entity->tags);
- if (!$entity = get_entity($comment->entity_guid)) {
- continue;
+ $entity = get_entity($comment->entity_guid);
+
+ // hic sunt dracones
+ if (!$entity) {
+ //continue;
+ $entity = new ElggObject();
+ $entity->setVolatileData('search_unavailable_entity', TRUE);
}
+
$comment_str = search_get_highlighted_relevant_substrings($comment->comment, $query);
+ $entity->setVolatileData('search_match_annotation_id', $comment->id);
$entity->setVolatileData('search_matched_comment', $comment_str);
$entity->setVolatileData('search_matched_comment_owner_guid', $comment->owner_guid);
$entity->setVolatileData('search_matched_comment_time_created', $comment->time_created);
@@ -264,7 +294,7 @@ function search_comments_hook($hook, $type, $value, $params) {
return array(
'entities' => $entities,
- 'count' => count($entities),
+ 'count' => $count,
);
}
diff --git a/mod/search/start.php b/mod/search/start.php
index 18b743cde..42366318a 100644
--- a/mod/search/start.php
+++ b/mod/search/start.php
@@ -40,7 +40,7 @@ function search_init() {
// can't use get_data() here because some servers don't have these globals set,
// which throws a db exception.
$r = mysql_query('SELECT @@ft_min_word_len as min, @@ft_max_word_len as max');
- if ($word_lens = mysql_fetch_assoc($r)) {
+ if ($r && ($word_lens = mysql_fetch_assoc($r))) {
$CONFIG->search_info['min_chars'] = $word_lens['min'];
$CONFIG->search_info['max_chars'] = $word_lens['max'];
} else {
@@ -74,11 +74,8 @@ function search_page_handler($page) {
}
/**
- * Return a string with highlighted matched elements.
- * Checks for "s
- * Provides context for matched elements.
- * Will not return more than $max_length of full context.
- * Only highlights words
+ * Return a string with highlighted matched queries and relevant context
+ * Determins context based upon occurance and distance of words with each other.
*
* @param unknown_type $haystack
* @param unknown_type $need
@@ -86,358 +83,235 @@ function search_page_handler($page) {
* @param unknown_type $max_length
* @return unknown_type
*/
-function search_get_highlighted_relevant_substrings($haystack, $needle, $min_match_context = 15, $max_length = 500) {
+function search_get_highlighted_relevant_substrings($haystack, $query, $min_match_context = 30, $max_length = 300) {
global $CONFIG;
$haystack = strip_tags($haystack);
- $haystack_lc = strtolower($haystack);
-//
-// $haystack = "Like merge sort, quicksort can also be easily parallelized due to its "
-// . "divide-and-conquer nature. Individual in-place partition operations are difficult "
-// . "to parallelize, but once divided, different sections of the list can be sorted in parallel. "
-// . "If we have p processors, we can divide a list of n ele";
-//
-// $needle = 'difficult to sort in parallel';
-
- // for now don't worry about "s or boolean operators
- $needle = str_replace(array('"', '-', '+', '~'), '', stripslashes(strip_tags($needle)));
- $words = explode(' ', $needle);
+ $haystack_length = elgg_strlen($haystack);
+ $haystack_lc = elgg_strtolower($haystack);
- $min_chars = $CONFIG->search_info['min_chars'];
- // if > ft_min_word == not running in literal mode.
- if ($needle >= $min_chars) {
- // clean out any words that are ignored by mysql
- foreach ($words as $i => $word) {
- if (strlen($word) < $min_chars) {
- unset ($words[$i]);
- }
- }
- }
+ $words = search_remove_ignored_words($query, 'array');
- /*
+ // if haystack < $max_length return the entire haystack w/formatting immediately
+ if ($haystack_length <= $max_length) {
+ $return = search_highlight_words($words, $haystack);
- $body_len = 250
-
- $context = 5-30, 20-45, 75-100, 150
-
- can pull out context either on:
- one of each matching term
- X # of highest matching terms
+ return $return;
+ }
- */
- $substr_counts = array();
- $str_pos = array();
- // matrices for being and end context lengths.
- // defaults to min context. will add additional context later if needed
+ // get the starting positions and lengths for all matching words
$starts = array();
- $stops = array();
-
- // map the words to the starts and stops
- $words_arg = array();
- $context_count = 0;
-
-
- // get the full count of matches.
+ $lengths = array();
foreach ($words as $word) {
- $word = strtolower($word);
- $count = substr_count($haystack, $word);
- $word_len = strlen($word);
+ $word = elgg_strtolower($word);
+ $count = elgg_substr_count($haystack_lc, $word);
+ $word_len = elgg_strlen($word);
// find the start positions for the words
if ($count > 1) {
- $str_pos[$word] = array();
$offset = 0;
- while (FALSE !== $pos = strpos($haystack, $word, $offset)) {
- $str_pos[$word][] = $pos;
- $starts[] = ($pos - $min_match_context > 0) ? $pos - $min_match_context : 0;
- $stops[] = $pos + $word_len + $min_match_context;
- $words_arg[] = $word;
- $context_count += $min_match_context + $word_len;
+ while (FALSE !== $pos = elgg_strpos($haystack_lc, $word, $offset)) {
+ $start = ($pos - $min_match_context > 0) ? $pos - $min_match_context : 0;
+ $starts[] = $start;
+ $stop = $pos + $word_len + $min_match_context;
+ $lengths[] = $stop - $start;
$offset += $pos + $word_len;
}
} else {
- $pos = strpos($haystack, $word);
- $str_pos[$word] = array($pos);
- $starts[] = ($pos - $min_match_context > 0) ? $pos - $min_match_context : 0;
- $stops[] = $pos + $word_len + $min_match_context;
- $context_count += $min_match_context + $word_len;
- $words_arg[] = $word;
- }
- $substr_counts[$word] = $count;
- }
-
- // sort by order of occurence
- //krsort($substr_counts);
- $full_count = array_sum($substr_counts);
-
- // figure out what the context needs to be.
- // take one of each matched phrase
- // if there are any
-
-//
-// var_dump($str_pos);
-// var_dump($substr_counts);
-// var_dump($context_count);
-
-
- // sort to put them in order of occurence
- asort($starts, SORT_NUMERIC);
- asort($stops, SORT_NUMERIC);
-
- // offset them correctly
- $starts[] = 0;
- $new_stops = array(0);
- foreach ($stops as $i => $pos) {
- $new_stops[$i+1] = $pos;
- }
- $stops = $new_stops;
-
- $substrings = array();
- $len = count($starts);
-
- $starts = array_merge($starts);
- $stops = array_merge($stops);
-
- $offsets = array();
- $limits = array();
- $c = 0;
- foreach ($starts as $i => $start) {
- $stop = $stops[$i];
- $offsets[$c] = $start;
- $limits[$c] = $stop;
-
- // never need the last one as it's just a displacing entry
- if ($c+1 == count($starts)) {
- break;
+ $pos = elgg_strpos($haystack_lc, $word);
+ $start = ($pos - $min_match_context > 0) ? $pos - $min_match_context : 0;
+ $starts[] = $start;
+ $stop = $pos + $word_len + $min_match_context;
+ $lengths[] = $stop - $start;
}
-
- if ($start - $stop < 0) {
- //var_dump("Looking at c=$c & $start - $stop and going to unset {$limits[$c]}");
- unset($offsets[$c]);
- unset($limits[$c]);
- }
- $c++;
}
- // reset indexes and remove placeholder elements.
- $limits = array_merge($limits);
- array_shift($limits);
- $offsets = array_merge($offsets);
- array_pop($offsets);
+ $offsets = search_consolidate_substrings($starts, $lengths);
- // figure out if we need to adjust the offsets from the base
- // this could result in overlapping summaries.
- // might be nicer to just remove it.
-
- $total_len = 0;
- foreach ($offsets as $i => $offset) {
- $total_len += $limits[$i] - $offset;
- }
+ // figure out if we can adjust the offsets and lengths
+ // in order to return more context
+ $total_length = array_sum($offsets);
$add_length = 0;
if ($total_length < $max_length) {
- $add_length = floor((($max_length - $total_len) / count($offsets)) / 2);
- }
-
- $lengths = array();
- foreach ($offsets as $i => $offset) {
- $limit = $limits[$i];
- if ($offset == 0 && $add_length) {
- $limit += $add_length;
- } else {
- $offset = $offset - $add_length;
+ $add_length = floor((($max_length - $total_length) / count($offsets)) / 2);
+
+ $starts = array();
+ $lengths = array();
+ foreach ($offsets as $offset => $length) {
+ $start = ($offset - $add_length > 0) ? $offset - $add_length : 0;
+ $length = $length + $add_length;
+ $starts[] = $start;
+ $lengths[] = $length;
}
- $string = substr($haystack, $offset, $limit - $offset);
- if ($offset != 0) {
- $string = "...$string";
- }
-
- if ($limit + $offset >= strlen($haystack)) {
- $string .= '...';
- }
-
- $substrings[] = $string;
- $lengths[] = strlen($string);
+ $offsets = search_consolidate_substrings($starts, $lengths);
}
- // sort by length of context.
- asort($lengths);
+ // sort by order of string size descending (which is roughly
+ // the proximity of matched terms) so we can keep the
+ // substrings with terms closest together and discard
+ // the others as needed to fit within $max_length.
+ arsort($offsets);
- $matched = '';
- foreach ($lengths as $i => $len) {
- $string = $substrings[$i];
+ $return_strs = array();
+ $total_length = 0;
+ foreach ($offsets as $start => $length) {
+ $string = trim(elgg_substr($haystack, $start, $length));
- if (strlen($matched) + strlen($string) < $max_length) {
- $matched .= $string;
+ // continue past if adding this substring exceeds max length
+ if ($total_length + $length > $max_length) {
+ continue;
}
- }
- $i = 1;
- foreach ($words as $word) {
- $search = "/($word)/i";
- $replace = "<strong class=\"searchMatch searchMatchColor$i\">$1</strong>";
- $matched = preg_replace($search, $replace, $matched);
- $i++;
+ $total_length += $length;
+ $return_strs[$start] = $string;
}
- return $matched;
-
-
- // crap below..
-
-
-
- for ($i=0; $i<$len; $i++) {
- $start = $starts[$i];
- $stop = $stops[$i];
- var_dump("Looking at $i = $start - $stop");
-
- while ($start - $stop <= 0) {
- $stop = $stops[$i++];
- var_dump("New start is $stop");
- }
+ // put the strings in order of occurence
+ ksort($return_strs);
- var_dump("$start-$stop");
+ // add ...s where needed
+ $return = implode('...', $return_strs);
+ if (!array_key_exists(0, $return_strs)) {
+ $return = "...$return";
}
- // find the intersecting contexts
- foreach ($starts as $i => $start_pos) {
- $words .= "{$words_arg[$i]}\t\t\t";
- echo "$start_pos\t\t\t";
+ // add to end of string if last substring doesn't hit the end.
+ $starts = array_keys($return_strs);
+ $last_pos = $starts[count($starts)-1];
+ if ($last_pos + elgg_strlen($return_strs[$last_pos]) < $haystack_length) {
+ $return .= '...';
}
- echo "\n";
+ $return = search_highlight_words($words, $return);
- foreach ($stops as $i => $stop_pos) {
- echo "$stop_pos\t\t\t";
- }
-echo "\n$words\n";
-
- // get full number of matches against all words to see how many we actually want to look at.
-
-
-
-
-// $desc = search_get_relevant_substring($entity->description, $params['query'], '<strong class="searchMatch">', '</strong>');
-
-
- $params['query'];
- // "this is"just a test "silly person"
+ return $return;
+}
- // check for "s
- $words_quotes = explode('"', $needle);
- $words_orig = explode(' ', $needle);
- $words = array();
+/**
+ * Takes an array of offsets and lengths and consolidates any
+ * overlapping entries, returning an array of new offsets and lengths
+ *
+ * Offsets and lengths are specified in separate arrays because of possible
+ * index collisions with the offsets.
+ *
+ * @param array $offsets
+ * @param array $lengths
+ * @return array
+ */
+function search_consolidate_substrings($offsets, $lengths) {
+ // sort offsets by occurence
+ asort($offsets, SORT_NUMERIC);
- foreach ($words_orig as $i => $word) {
- // figure out if we have a special operand
- $operand = substr($word, 0, 1);
- switch($operand) {
- case '"':
- // find the matching " if any. else, remove the "
- if (substr_count($query, '"') < 2) {
- $words[] = substr($word, 1);
- } else {
- $word = substr($word, 1);
- $word_i = $i;
- while ('"' != strpos($words_orig[$word_i], '"')) {
- $word .= " {$words_orig[$word_i]}";
- unset($words_orig[$word_i]);
- }
- }
+ // reset the indexes maintaining association with the original offsets.
+ $offsets = array_merge($offsets);
- break;
+ $new_lengths = array();
+ foreach ($offsets as $i => $offset) {
+ $new_lengths[] = $lengths[$i];
+ }
- case '+':
- // remove +
- $words[] = substr($word, 1);
- break;
+ $lengths = $new_lengths;
- case '~':
- case '-':
- // remove this from highlighted list.
+ $return = array();
+ $count = count($offsets);
+ for ($i=0; $i<$count; $i++) {
+ $offset = $offsets[$i];
+ $length = $lengths[$i];
+ $end_pos = $offset + $length;
+ // find the next entry that doesn't overlap
+ while(array_key_exists($i+1, $offsets) && $end_pos > $offsets[$i+1]) {
+ $i++;
+ if (!array_key_exists($i, $offsets)) {
break;
+ }
+ $end_pos = $lengths[$i] + $offsets[$i];
}
- }
- // pick out " queries
- if (substr_count($query, '"') >= 2) {
+ $length = $end_pos - $offset;
+ // will never have a colliding offset, so can return as a single array
+ $return[$offset] = $length;
}
- // ignore queries starting with -
-
-
- // @todo figure out a way to "center" the matches within the max_length.
- // if only one match, its context is $context + $max_length / 2
- // if 2 matches, its context is $context + $max_length / 4
- // if 3 matches, its context is $context + $max_length / 6
- // $context per match = $min_match_context + ($max_length / $num_count_match)
-
- // if $max_length / ($matched_count * 2) < $context
- // only match against the first X matches where $context >= $context
+ return $return;
}
/**
- * Returns a matching string with $context amount of context, optionally
- * surrounded by $before and $after.
- *
- * If no match is found, restricts string to $context*2 starting from strpos 0.
+ * Safely highlights the words in $words found in $string avoiding recursion
*
- * @param str $haystack
- * @param str $needle
- * @param str $before
- * @param str $after
- * @param int $context
- * @return str
+ * @param array $words
+ * @param string $string
+ * @return string
*/
-function search_get_relevant_substring($haystack, $needle, $before = '', $after = '', $context = 75) {
- $haystack = strip_tags($haystack);
- $needle = strip_tags($needle);
-
- $pos = strpos(strtolower($haystack), strtolower($needle));
+function search_highlight_words($words, $string) {
+ $i = 1;
+ $replace_html = array(
+ 'strong' => rand(10000,99999),
+ 'class' => rand(10000,99999),
+ 'searchMatch' => rand(10000,99999),
+ 'searchMatchColor' => rand(10000,99999)
+ );
- if ($pos === FALSE) {
- $str = substr($haystack, 0, $context*2);
- if (strlen($haystack) > $context*2) {
- $str .= '...';
- }
+ foreach ($words as $word) {
+ $search = "/($word)/i";
- return $str;
+ // must replace with placeholders in case one of the search terms is
+ // in the html string.
+ // later, will replace the placeholders with the actual html.
+ // Yeah this is hacky. I'm tired.
+ $strong = $replace_html['strong'];
+ $class = $replace_html['class'];
+ $searchMatch = $replace_html['searchMatch'];
+ $searchMatchColor = $replace_html['searchMatchColor'];
+
+ $replace = "<$strong $class=\"$searchMatch $searchMatchColor{$i}\">$1</$strong>";
+ $string = preg_replace($search, $replace, $string);
+ $i++;
}
- $start_pos = $pos - $context;
-
- if ($start_pos < 0) {
- $start_pos = 0;
+ foreach ($replace_html as $replace => $search) {
+ $string = str_replace($search, $replace, $string);
}
- // get string from -context to +context
- $matched = substr($haystack, $start_pos, $context*2);
+ return $string;
+}
- // add elipses to front.
- if ($start_pos > 0) {
- $matched = "...$matched";
- }
+/**
+ * Returns a query with stop and too short words removed.
+ * (Unless the entire query is < ft_min_word_chars, in which case
+ * it's taken literally.)
+ *
+ * @param array $query
+ * @param str $format Return as an array or a string
+ * @return mixed
+ */
+function search_remove_ignored_words($query, $format = 'array') {
+ global $CONFIG;
+
+ // don't worry about "s or boolean operators
+ $query = str_replace(array('"', '-', '+', '~'), '', stripslashes(strip_tags($query)));
+ $words = explode(' ', $query);
- // add elipses to end.
- if ($pos + strlen($needle) + $context*2 < strlen($haystack)) {
- $matched = "$matched...";
+ $min_chars = $CONFIG->search_info['min_chars'];
+ // if > ft_min_word we're not running in literal mode.
+ if ($query >= $min_chars) {
+ // clean out any words that are ignored by mysql
+ foreach ($words as $i => $word) {
+ if (elgg_strlen($word) < $min_chars) {
+ unset ($words[$i]);
+ }
+ }
}
- // surround if needed
- // @todo would getting each position of the match then
- // inserting manually based on the position be faster than preg_replace()?
- if ($before || $after) {
- $matched = str_ireplace($needle, $before . $needle . $after, $matched);
- //$matched = mb_ereg_replace("")
- // insert before
+ if ($format == 'string') {
+ return implode(' ', $words);
}
- return $matched;
+ return $words;
}
@@ -498,7 +372,7 @@ function search_get_listing_html($entities, $count, $params) {
* @param array $params Original search params
* @return str
*/
-function search_get_where_sql($table, $fields, $params) {
+function search_get_where_sql($table, $fields, $params, $use_fulltext = TRUE) {
global $CONFIG;
$query = $params['query'];
@@ -507,49 +381,49 @@ function search_get_where_sql($table, $fields, $params) {
$fields[$i] = "$table.$field";
}
+ // if we're not using full text, rewrite the query for bool mode.
+ // exploiting a feature(ish) of bool mode where +-word is the same as -word
+ if (!$use_fulltext) {
+ $query = '+' . str_replace(' ', ' +', $query);
+ }
+
// if query is shorter than the min for fts words
// it's likely a single acronym or similar
// switch to literal mode
- if (strlen($query) < $CONFIG->search_info['min_chars']) {
+ if (elgg_strlen($query) < $CONFIG->search_info['min_chars']) {
$likes = array();
$query = sanitise_string($query);
foreach ($fields as $field) {
$likes[] = "$field LIKE '%$query%'";
}
$likes_str = implode(' OR ', $likes);
- //$where = "($table.guid = e.guid AND ($likes_str))";
$where = "($likes_str)";
} else {
// if using advanced or paired "s, switch into boolean mode
- if ((isset($params['advanced_search']) && $params['advanced_search']) || substr_count($query, '"') >= 2 ) {
+ if (!$use_fulltext
+ || (isset($params['advanced_search']) && $params['advanced_search'])
+ || elgg_substr_count($query, '"') >= 2 ) {
$options = 'IN BOOLEAN MODE';
} else {
- // natural language mode is default and this keyword isn't supported
- // in < 5.1
+ // natural language mode is default and this keyword isn't supported in < 5.1
//$options = 'IN NATURAL LANGUAGE MODE';
$options = '';
}
// if short query, use query expansion.
- if (strlen($query) < 6) {
+ // @todo doesn't seem to be working well.
+ if (elgg_strlen($query) < 5) {
//$options .= ' WITH QUERY EXPANSION';
}
$query = sanitise_string($query);
- // if query is shorter than the ft_min_word_len switch to literal mode.
$fields_str = implode(',', $fields);
- //$where = "($table.guid = e.guid AND (MATCH ($fields_str) AGAINST ('$query' $options)))";
$where = "(MATCH ($fields_str) AGAINST ('$query' $options))";
}
return $where;
}
-function search_get_query_where_sql($table, $query) {
- // if there are multiple "s or 's it's a literal string.
-
-}
-
/** Register init system event **/
register_elgg_event_handler('init','system','search_init'); \ No newline at end of file
diff --git a/mod/search/views/default/page_elements/searchbox.php b/mod/search/views/default/page_elements/searchbox.php
index cfc0b953b..f7746b0da 100644
--- a/mod/search/views/default/page_elements/searchbox.php
+++ b/mod/search/views/default/page_elements/searchbox.php
@@ -1,4 +1,16 @@
+<?php
+
+if (array_key_exists('value', $vars)) {
+ $value = $vars['value'];
+} elseif ($value = get_input('q', get_input('tag', NULL))) {
+ $value = $value;
+} else {
+ $value = elgg_echo('search');
+}
+
+?>
+
<form id="searchform" action="<?php echo $vars['url']; ?>pg/search/" method="get">
- <input type="text" size="21" name="q" value="<?php echo elgg_echo('search'); ?>" onclick="if (this.value=='<?php echo elgg_echo('search'); ?>') { this.value='' }" class="search_input" />
+ <input type="text" size="21" name="q" value="<?php echo $value; ?>" onclick="if (this.value=='<?php echo elgg_echo('search'); ?>') { this.value='' }" class="search_input" />
<input type="submit" value="<?php echo elgg_echo('search:go'); ?>" class="search_submit_button" />
</form>
diff --git a/mod/search/views/default/search/comments/listing.php b/mod/search/views/default/search/comments/listing.php
index 58353a110..13f368f2b 100644
--- a/mod/search/views/default/search/comments/listing.php
+++ b/mod/search/views/default/search/comments/listing.php
@@ -7,16 +7,12 @@
* @author Curverider Ltd
* @link http://elgg.org/
*/
-?>
-<div class="search_listing">
-<?php
if (!is_array($vars['entities']) || !count($vars['entities'])) {
return FALSE;
}
$title_str = elgg_echo('comments');
-$body = elgg_view_title($title_str);
$query = htmlspecialchars(http_build_query(
array(
@@ -30,9 +26,33 @@ $query = htmlspecialchars(http_build_query(
));
$url = "{$vars['url']}pg/search?$query";
-$more = "<a href=\"$url\">+$count more $title_str</a>";
-echo elgg_view('page_elements/contentwrapper', array('body' => $body));
+// get pagination
+if (array_key_exists('pagination', $vars) && $vars['pagination']) {
+ $nav .= elgg_view('navigation/pagination',array(
+ 'baseurl' => $url,
+ 'offset' => $vars['params']['offset'],
+ 'count' => $vars['count'],
+ 'limit' => $vars['params']['limit'],
+ ));
+} else {
+ $nav = '';
+}
+
+// get more links
+$more_check = $vars['count'] - ($vars['params']['offset'] + $vars['params']['limit']);
+$more = ($more_check > 0) ? $more_check : 0;
+
+if ($more) {
+ $title_key = ($more == 1) ? 'comment' : 'comments';
+ $more_str = sprintf(elgg_echo('search:more'), $vars['count'], elgg_echo($title_key));
+ $more_link = "<a href=\"$url\">$more_str</a>";
+} else {
+ $more_link = '';
+}
+
+echo $nav;
+$body = elgg_view_title($title_str);
foreach ($vars['entities'] as $entity) {
if ($owner = $entity->getOwnerEntity()) {
@@ -41,22 +61,46 @@ foreach ($vars['entities'] as $entity) {
} else {
$icon = '';
}
- $title = "Comment on " . elgg_echo('item:' . $entity->getType() . ':' . $entity->getSubtype());
+
+ // @todo Sometimes we find comments on entities we can't display...
+ if ($entity->getVolatileData('search_unavailable_entity')) {
+ $title = sprintf(elgg_echo('search:comment_on'), elgg_echo('search:unavailable_entity'));
+ // keep anchor for formatting.
+ $title = "<a>$title</a>";
+ } else {
+ if ($entity->getType() == 'object') {
+ $title = $entity->title;
+ } else {
+ $title = $entity->name;
+ }
+
+ if (!$title) {
+ $title = elgg_echo('item:' . $entity->getType() . ':' . $entity->getSubtype());
+ }
+
+ if (!$title) {
+ $title = elgg_echo('item:' . $entity->getType());
+ }
+
+ $title = sprintf(elgg_echo('search:comment_on'), $title);
+ $url = $entity->getURL() . '#annotation-' . $entity->getVolatileData('search_match_annotation_id');
+ $title = "<a href=\"$url\">$title</a>";
+ }
+
$description = $entity->getVolatileData('search_matched_comment');
- $url = $entity->getURL();
- $title = "<a href=\"$url\">$title</a>";
$tc = $entity->getVolatileData('search_matched_comment_time_created');;
$time = friendly_time($tc);
- echo <<<___END
+ $body .= <<<___END
<span class="searchListing">
<h3 class="searchTitle">$title</h3>
<span class="searchDetails">
<span class="searchDescription">$description</span><br />
- $icon $time - $more</a>
+ $icon $time - $more_link</a>
</span>
</span>
___END;
}
-?>
-</div>
+
+$body .= $nav;
+echo elgg_view('page_elements/contentwrapper', array('body' => $body));
diff --git a/mod/search/views/default/search/css.php b/mod/search/views/default/search/css.php
index 28dc82a4a..19cca02e8 100644
--- a/mod/search/views/default/search/css.php
+++ b/mod/search/views/default/search/css.php
@@ -38,7 +38,6 @@ margin: 6px;
background-color: #99FF99;
}
-
.searchTitle {
text-decoration: underline;
}
@@ -91,7 +90,7 @@ margin: 6px;
}
/* override the entity container piece */
-.search_listing .entity_listing {
+.search_listing .search_listing {
-webkit-border-radius: 0px;
-moz-border-radius: 0px;
background: transparent;
diff --git a/mod/search/views/default/search/listing.php b/mod/search/views/default/search/listing.php
index 37850c911..f947bd808 100644
--- a/mod/search/views/default/search/listing.php
+++ b/mod/search/views/default/search/listing.php
@@ -7,11 +7,8 @@
* @author Curverider Ltd
* @link http://elgg.org/
*/
-?>
-<div class="search_listing">
-<?php
$entities = $vars['entities'];
$count = $vars['count'] - count($vars['entities']);
@@ -19,24 +16,52 @@ if (!is_array($vars['entities']) || !count($vars['entities'])) {
return FALSE;
}
-$title_str = elgg_echo("item:{$vars['params']['type']}:{$vars['params']['subtype']}");
-$body = elgg_view_title($title_str);
+// figure out what we're deal with.
+if (array_key_exists('type', $vars['params']) && array_key_exists('subtype', $vars['params'])) {
+ $type_str = elgg_echo("item:{$vars['params']['type']}:{$vars['params']['subtype']}");
+} elseif (array_key_exists('type', $vars['params'])) {
+ $type_str = elgg_echo("item:{$vars['params']['type']}");
+} else {
+ $type_str = elgg_echo('search:unknown_entity');
+}
$query = htmlspecialchars(http_build_query(
array(
'q' => $vars['params']['query'],
'entity_type' => $vars['params']['type'],
'entity_subtype' => $vars['params']['subtype'],
- 'limit' => get_input('limit', 10),
- 'offset' => get_input('offset', 0),
'search_type' => 'entities',
)
));
$url = "{$vars['url']}pg/search?$query";
-$more = "<a href=\"$url\">+$count more $title_str</a>";
-echo elgg_view('page_elements/contentwrapper', array('body' => $body));
+// get pagination
+if (array_key_exists('pagination', $vars['params']) && $vars['params']['pagination']) {
+ $nav .= elgg_view('navigation/pagination',array(
+ 'baseurl' => $url,
+ 'offset' => $vars['params']['offset'],
+ 'count' => $vars['count'],
+ 'limit' => $vars['params']['limit'],
+ ));
+} else {
+ $nav = '';
+}
+
+// get any more links.
+$more_check = $vars['count'] - ($vars['params']['offset'] + $vars['params']['limit']);
+$more = ($more_check > 0) ? $more_check : 0;
+
+if ($more) {
+ $title_key = ($more == 1) ? 'comment' : 'comments';
+ $more_str = sprintf(elgg_echo('search:more'), $count, $type_str);
+ $more_link = "<a href=\"$url\">$more_str</a>";
+} else {
+ $more_link = '';
+}
+
+echo $nav;
+$body = elgg_view_title($title_str);
foreach ($entities as $entity) {
if ($owner = $entity->getOwnerEntity()) {
@@ -53,16 +78,14 @@ foreach ($entities as $entity) {
$tu = $entity->time_updated;
$time = friendly_time(($tu > $tc) ? $tu : $tc);
- echo <<<___END
+ $body .= <<<___END
<span class="searchListing">
<h3 class="searchTitle">$title</h3>
- <span class="searchDetails">
- <span class="searchDescription">$description</span><br />
- $icon $time - $more</a>
- </span>
+ <span class="searchDescription">$description</span><br />
+ <span class="searchInfo">$icon $time - $more_link</span>
</span>
___END;
}
-?>
-</div> \ No newline at end of file
+echo elgg_view('page_elements/contentwrapper', array('body' => $body));
+echo $nav; \ No newline at end of file