<?php /** * Elgg Social * Functions and objects which provide powerful social aspects within Elgg * * @package Elgg * @subpackage Core * @license http://www.gnu.org/licenses/old-licenses/gpl-2.0.html GNU Public License version 2 * @author Curverider * @copyright Curverider Ltd 2008 * @link http://elgg.org/ /** * Filters a string into an array of significant words * * @param string $string * @return array */ function filter_string(string $string) { // Convert it to lower and trim $string = strtolower($string); $string = trim($string); // Remove links and email addresses // match protocol://address/path/file.extension?some=variable&another=asf% $string = preg_replace("/\s([a-zA-Z]+:\/\/[a-z][a-z0-9\_\.\-]*[a-z]{2,6}[a-zA-Z0-9\/\*\-\?\&\%\=]*)([\s|\.|\,])/iu"," ", $string); // match www.something.domain/path/file.extension?some=variable&another=asf% $string = preg_replace("/\s(www\.[a-z][a-z0-9\_\.\-]*[a-z]{2,6}[a-zA-Z0-9\/\*\-\?\&\%\=]*)([\s|\.|\,])/iu"," ", $string); // match name@address $string = preg_replace("/\s([a-zA-Z][a-zA-Z0-9\_\.\-]*[a-zA-Z]*\@[a-zA-Z][a-zA-Z0-9\_\.\-]*[a-zA-Z]{2,6})([\s|\.|\,])/iu"," ", $string); // Sanitise the string; remove unwanted characters $string = preg_replace('/\W/ui', ' ', $string); // Explode it into an array $terms = explode(' ',$string); // Remove any blacklist terms $terms = array_filter($terms, 'remove_blacklist'); return $terms; } /** * Returns true if the word in $input is considered significant * * @param string $input * @return true|false */ function remove_blacklist($input) { global $CONFIG; if (strlen($input) < 3 || in_array($input,$CONFIG->wordblacklist)) return false; return true; } // Set the shout words blacklist, these do not become tags when the string is converted // Any language packs should include their own! GLOBAL $CONFIG; $CONFIG->wordblacklist = array( 'and', 'the', 'then', 'but', 'she', 'his', 'her', 'him', 'one', 'not', 'also', 'about', 'now', 'hence', 'however', 'still', 'likewise', 'otherwise', 'therefore', 'conversely', 'rather', 'consequently', 'furthermore', 'nevertheless', 'instead', 'meanwhile', 'accordingly', 'this', 'seems', 'what', 'whom', 'whose', 'whoever', 'whomever', ); ?>