<?php

    /**
	 * Elgg Social
	 * Functions and objects which provide powerful social aspects within Elgg
	 * 
	 * @package Elgg
	 * @subpackage Core
	 * @license http://www.gnu.org/licenses/old-licenses/gpl-2.0.html GNU Public License version 2
	 * @author Curverider
	 * @copyright Curverider Ltd 2008
	 * @link http://elgg.org/
	 
	 /**
      * Filters a string into an array of significant words
      *
      * @param string $string
      * @return array
      */
        function filter_string(string $string) {
            
        	// Convert it to lower and trim
        	   $string = strtolower($string);
        	   $string = trim($string);
        	
        	// Remove links and email addresses
        	   // match protocol://address/path/file.extension?some=variable&another=asf%
			   $string = preg_replace("/\s([a-zA-Z]+:\/\/[a-z][a-z0-9\_\.\-]*[a-z]{2,6}[a-zA-Z0-9\/\*\-\?\&\%\=]*)([\s|\.|\,])/iu"," ", $string);
               // match www.something.domain/path/file.extension?some=variable&another=asf%
			   $string = preg_replace("/\s(www\.[a-z][a-z0-9\_\.\-]*[a-z]{2,6}[a-zA-Z0-9\/\*\-\?\&\%\=]*)([\s|\.|\,])/iu"," ", $string);
               // match name@address
			   $string = preg_replace("/\s([a-zA-Z][a-zA-Z0-9\_\.\-]*[a-zA-Z]*\@[a-zA-Z][a-zA-Z0-9\_\.\-]*[a-zA-Z]{2,6})([\s|\.|\,])/iu"," ", $string);
        	
        	// Sanitise the string; remove unwanted characters
        	   $string = preg_replace('/\W/ui', ' ', $string);
        	   
            // Explode it into an array
        	   $terms = explode(' ',$string);
        	   
            // Remove any blacklist terms
               $terms = array_filter($terms, 'remove_blacklist');
        	    
               return $terms;

        }
        
        /**
         * Returns true if the word in $input is considered significant
         *
         * @param string $input
         * @return true|false
         */
        function remove_blacklist($input) {
        	
        	global $CONFIG;
        	
        	if (strlen($input) < 3 || in_array($input,$CONFIG->wordblacklist))
        	   return false;
        	
            return true;        	
        	
        }
        
        // Set the shout words blacklist, these do not become tags when the string is converted
        // Any language packs should include their own!
        
            GLOBAL $CONFIG;
		        $CONFIG->wordblacklist = array(
	            'and',
	            'the',
	            'then',
	            'but',
	            'she',
	            'his',
	            'her',
	            'him',
	            'one',
	            'not',
	            'also',
	            'about',
	            'now',
				'hence',
				'however',
				'still',
				'likewise',
				'otherwise',
				'therefore',
				'conversely',
				'rather',
	            'consequently',
				'furthermore',
				'nevertheless',
				'instead',
				'meanwhile',
				'accordingly',
				'this',
				'seems',
				'what',
				'whom',
				'whose',
				'whoever',
				'whomever',
	        );
	 
?>