From c5cc2821311012a8a4385a304a043c4b41f2afbb Mon Sep 17 00:00:00 2001 From: brettp Date: Mon, 31 Aug 2009 19:05:21 +0000 Subject: All line endings are now Unix-style. git-svn-id: https://code.elgg.org/elgg/trunk@3451 36083f99-b078-4883-b0ff-0f9b5a30f544 --- engine/lib/social.php | 214 +++++++++++++++++++++++++------------------------- 1 file changed, 107 insertions(+), 107 deletions(-) (limited to 'engine/lib/social.php') diff --git a/engine/lib/social.php b/engine/lib/social.php index b00b56d79..e8bfccda5 100644 --- a/engine/lib/social.php +++ b/engine/lib/social.php @@ -1,76 +1,76 @@ -wordblacklist)) - return $input; - - if (strlen($input) < 3 || in_array($input,$CONFIG->wordblacklist)) - return false; - - return true; - - } - + * @author Curverider + + * @link http://elgg.org/ + + /** + * Filters a string into an array of significant words + * + * @param string $string + * @return array + */ + function filter_string($string) { + + // Convert it to lower and trim + $string = strtolower($string); + $string = trim($string); + + // Remove links and email addresses + // match protocol://address/path/file.extension?some=variable&another=asf% + $string = preg_replace("/\s([a-zA-Z]+:\/\/[a-z][a-z0-9\_\.\-]*[a-z]{2,6}[a-zA-Z0-9\/\*\-\?\&\%\=]*)([\s|\.|\,])/iu"," ", $string); + // match www.something.domain/path/file.extension?some=variable&another=asf% + $string = preg_replace("/\s(www\.[a-z][a-z0-9\_\.\-]*[a-z]{2,6}[a-zA-Z0-9\/\*\-\?\&\%\=]*)([\s|\.|\,])/iu"," ", $string); + // match name@address + $string = preg_replace("/\s([a-zA-Z][a-zA-Z0-9\_\.\-]*[a-zA-Z]*\@[a-zA-Z][a-zA-Z0-9\_\.\-]*[a-zA-Z]{2,6})([\s|\.|\,])/iu"," ", $string); + + // Sanitise the string; remove unwanted characters + $string = preg_replace('/\W/ui', ' ', $string); + + // Explode it into an array + $terms = explode(' ',$string); + + // Remove any blacklist terms + //$terms = array_filter($terms, 'remove_blacklist'); + + return $terms; + + } + + /** + * Returns true if the word in $input is considered significant + * + * @param string $input + * @return true|false + */ + function remove_blacklist($input) { + + global $CONFIG; + + if (!is_array($CONFIG->wordblacklist)) + return $input; + + if (strlen($input) < 3 || in_array($input,$CONFIG->wordblacklist)) + return false; + + return true; + + } + /** * Initialise. * * Sets a blacklist of words in the current language. This is a comma separated list in word:blacklist. - */ - function social_init() { + */ + function social_init() { global $CONFIG; $CONFIG->wordblacklist = array(); @@ -83,46 +83,46 @@ } else { - // Fallback - shouldn't happen - $CONFIG->wordblacklist = array( - 'and', - 'the', - 'then', - 'but', - 'she', - 'his', - 'her', - 'him', - 'one', - 'not', - 'also', - 'about', - 'now', - 'hence', - 'however', - 'still', - 'likewise', - 'otherwise', - 'therefore', - 'conversely', - 'rather', - 'consequently', - 'furthermore', - 'nevertheless', - 'instead', - 'meanwhile', - 'accordingly', - 'this', - 'seems', - 'what', - 'whom', - 'whose', - 'whoever', - 'whomever', + // Fallback - shouldn't happen + $CONFIG->wordblacklist = array( + 'and', + 'the', + 'then', + 'but', + 'she', + 'his', + 'her', + 'him', + 'one', + 'not', + 'also', + 'about', + 'now', + 'hence', + 'however', + 'still', + 'likewise', + 'otherwise', + 'therefore', + 'conversely', + 'rather', + 'consequently', + 'furthermore', + 'nevertheless', + 'instead', + 'meanwhile', + 'accordingly', + 'this', + 'seems', + 'what', + 'whom', + 'whose', + 'whoever', + 'whomever', ); - } - } - - register_elgg_event_handler("init","system","social_init"); - + } + } + + register_elgg_event_handler("init","system","social_init"); + ?> \ No newline at end of file -- cgit v1.2.3