From 2d365ba900e40494abeb306e3a881c91e2099ba6 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Thu, 17 Jan 2013 11:05:16 -0500 Subject: Allow friend collection names to store arbitrary plain text --- actions/friends/collections/add.php | 2 +- views/default/output/access.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/actions/friends/collections/add.php b/actions/friends/collections/add.php index 9dc17b37e..e63a149f7 100644 --- a/actions/friends/collections/add.php +++ b/actions/friends/collections/add.php @@ -6,7 +6,7 @@ * @subpackage Friends.Collections */ -$collection_name = get_input('collection_name'); +$collection_name = htmlspecialchars(get_input('collection_name', '', false), ENT_QUOTES, 'UTF-8'); $friends = get_input('friends_collection'); if (!$collection_name) { diff --git a/views/default/output/access.php b/views/default/output/access.php index 91c5c721e..5c8d62c4d 100644 --- a/views/default/output/access.php +++ b/views/default/output/access.php @@ -11,7 +11,7 @@ if (isset($vars['entity']) && elgg_instanceof($vars['entity'])) { $access_id = $vars['entity']->access_id; $access_class = 'elgg-access'; $access_id_string = get_readable_access_level($access_id); - $access_id_string = htmlentities($access_id_string, ENT_QUOTES, 'UTF-8'); + $access_id_string = htmlspecialchars($access_id_string, ENT_QUOTES, 'UTF-8', false); // if within a group or shared access collection display group name and open/closed membership status // @todo have a better way to do this instead of checking against subtype / class. -- cgit v1.2.3 From 1d11e783557ed5b1fe5ca73d2ad01d1bfc53dbfa Mon Sep 17 00:00:00 2001 From: cash Date: Tue, 19 Feb 2013 10:46:23 -0500 Subject: Refs #4970 prevent Firefox from adding files to TinyMCE editor --- mod/tinymce/views/default/js/tinymce.php | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mod/tinymce/views/default/js/tinymce.php b/mod/tinymce/views/default/js/tinymce.php index b4db43cee..344d71b14 100644 --- a/mod/tinymce/views/default/js/tinymce.php +++ b/mod/tinymce/views/default/js/tinymce.php @@ -66,6 +66,18 @@ elgg.tinymce.init = function() { var text = elgg.echo('tinymce:word_count') + strip.split(' ').length + ' '; tinymce.DOM.setHTML(tinymce.DOM.get(tinyMCE.activeEditor.id + '_path_row'), text); }); + + ed.onInit.add(function(ed) { + // prevent Firefox from dragging/dropping files into editor + if (tinymce.isGecko) { + tinymce.dom.Event.add(ed.getBody().parentNode, "drop", function(e) { + if (e.dataTransfer.files.length > 0) { + e.preventDefault(); + } + }); + } + }); + }, content_css: elgg.config.wwwroot + 'mod/tinymce/css/elgg_tinymce.css' }); -- cgit v1.2.3 From 2c7fe16e6d8d135109c6da60739e4ffad99876d5 Mon Sep 17 00:00:00 2001 From: Paweł Sroka Date: Wed, 6 Mar 2013 19:04:35 +0100 Subject: Refs #5199 - Adds additional info to locate output start in case of headers already sent exception --- engine/lib/elgglib.php | 4 ++-- languages/en.php | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/engine/lib/elgglib.php b/engine/lib/elgglib.php index 74b70f9fb..2ae307392 100644 --- a/engine/lib/elgglib.php +++ b/engine/lib/elgglib.php @@ -128,7 +128,7 @@ function elgg_load_library($name) { * @throws SecurityException */ function forward($location = "", $reason = 'system') { - if (!headers_sent()) { + if (!headers_sent($file, $line)) { if ($location === REFERER) { $location = $_SERVER['HTTP_REFERER']; } @@ -147,7 +147,7 @@ function forward($location = "", $reason = 'system') { exit; } } else { - throw new SecurityException(elgg_echo('SecurityException:ForwardFailedToRedirect')); + throw new SecurityException(elgg_echo('SecurityException:ForwardFailedToRedirect', array($file, $line))); } } diff --git a/languages/en.php b/languages/en.php index fe450b8a2..501855f02 100644 --- a/languages/en.php +++ b/languages/en.php @@ -175,7 +175,7 @@ $english = array( 'ConfigurationException:NoSiteID' => "No site ID has been specified.", 'SecurityException:APIAccessDenied' => "Sorry, API access has been disabled by the administrator.", 'SecurityException:NoAuthMethods' => "No authentication methods were found that could authenticate this API request.", - 'SecurityException:ForwardFailedToRedirect' => 'Redirect could not be issued due to headers already being sent. Halting execution for security. Search http://docs.elgg.org/ for more information.', + 'SecurityException:ForwardFailedToRedirect' => 'Redirect could not be issued due to headers already being sent. Halting execution for security. Output started in file %s at line %d. Search http://docs.elgg.org/ for more information.', 'InvalidParameterException:APIMethodOrFunctionNotSet' => "Method or function not set in call in expose_method()", 'InvalidParameterException:APIParametersArrayStructure' => "Parameters array structure is incorrect for call to expose method '%s'", 'InvalidParameterException:UnrecognisedHttpMethod' => "Unrecognised http method %s for api method '%s'", -- cgit v1.2.3 From 4cd8bc8d68008f509ce97b2e31e1e5ccfec7bdf0 Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Fri, 15 Mar 2013 08:22:32 -0400 Subject: fixed some coding standards issues as detected by code sniffer --- engine/classes/ElggPriorityList.php | 13 +++++++++---- engine/classes/ElggTranslit.php | 18 ++++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/engine/classes/ElggPriorityList.php b/engine/classes/ElggPriorityList.php index b5f8fe163..416df885c 100644 --- a/engine/classes/ElggPriorityList.php +++ b/engine/classes/ElggPriorityList.php @@ -165,9 +165,9 @@ class ElggPriorityList /** * Move an existing element to a new priority. * - * @param mixed $element The element to move - * @param int $new_priority The new priority for the element - * @param bool $strict Whether to check the type of the element match + * @param mixed $element The element to move + * @param int $new_priority The new priority for the element + * @param bool $strict Whether to check the type of the element match * @return bool */ public function move($element, $new_priority, $strict = false) { @@ -354,7 +354,12 @@ class ElggPriorityList return ($key !== NULL && $key !== FALSE); } - // Countable + /** + * Countable interface + * + * @see Countable::count() + * @return int + */ public function count() { return count($this->elements); } diff --git a/engine/classes/ElggTranslit.php b/engine/classes/ElggTranslit.php index 676c59fc8..79116fc01 100644 --- a/engine/classes/ElggTranslit.php +++ b/engine/classes/ElggTranslit.php @@ -20,11 +20,10 @@ * and is licensed under the LGPL. For more information, see * . * - * @author Konsta Vesterinen - * @author Jonathan H. Wage - * - * @author Steve Clay - * @package Elgg.Core + * @package Elgg.Core + * @author Konsta Vesterinen + * @author Jonathan H. Wage + * @author Steve Clay * * @access private Plugin authors should not use this directly */ @@ -32,8 +31,9 @@ class ElggTranslit { /** * Create a version of a string for embedding in a URL - * @param string $string a UTF-8 string - * @param string $separator + * + * @param string $string A UTF-8 string + * @param string $separator The character to separate words with * @return string */ static public function urlize($string, $separator = '-') { @@ -98,6 +98,7 @@ class ElggTranslit { /** * Transliterate Western multibyte chars to ASCII + * * @param string $utf8 a UTF-8 string * @return string */ @@ -247,6 +248,7 @@ class ElggTranslit { /** * Tests that "normalizer_normalize" exists and works + * * @return bool */ static public function hasNormalizerSupport() { @@ -255,7 +257,7 @@ class ElggTranslit { $form_c = "\xC3\x85"; // 'LATIN CAPITAL LETTER A WITH RING ABOVE' (U+00C5) $form_d = "A\xCC\x8A"; // A followed by 'COMBINING RING ABOVE' (U+030A) $ret = (function_exists('normalizer_normalize') - && $form_c === normalizer_normalize($form_d)); + && $form_c === normalizer_normalize($form_d)); } return $ret; } -- cgit v1.2.3 From a2ecf54d56d9f877e6f0f8ac6d841cee6187aac4 Mon Sep 17 00:00:00 2001 From: cash Date: Fri, 15 Mar 2013 11:18:05 -0400 Subject: more coding standard fixes --- engine/classes/ElggEntity.php | 11 +++++------ engine/classes/ElggGroup.php | 9 +++------ engine/classes/ElggMenuBuilder.php | 12 ++++++------ engine/classes/ElggObject.php | 9 +++------ engine/classes/ElggSite.php | 12 ++++-------- engine/classes/ElggTranslit.php | 26 +++++++++++++------------- engine/classes/ElggUser.php | 12 ++++-------- engine/lib/configuration.php | 4 ++-- engine/lib/elgglib.php | 6 +++--- engine/lib/languages.php | 3 +++ engine/lib/location.php | 2 +- engine/lib/metadata.php | 4 ++-- engine/lib/plugins.php | 2 +- engine/lib/relationships.php | 2 +- engine/lib/views.php | 20 +++++++++++--------- 15 files changed, 62 insertions(+), 72 deletions(-) diff --git a/engine/classes/ElggEntity.php b/engine/classes/ElggEntity.php index f44e73023..5a63c7b15 100644 --- a/engine/classes/ElggEntity.php +++ b/engine/classes/ElggEntity.php @@ -375,12 +375,11 @@ abstract class ElggEntity extends ElggData implements } return $result; - } - - // unsaved entity. store in temp array - // returning single entries instead of an array of 1 element is decided in - // getMetaData(), just like pulling from the db. - else { + } else { + // unsaved entity. store in temp array + // returning single entries instead of an array of 1 element is decided in + // getMetaData(), just like pulling from the db. + // // if overwrite, delete first if (!$multiple || !isset($this->temp_metadata[$name])) { $this->temp_metadata[$name] = array(); diff --git a/engine/classes/ElggGroup.php b/engine/classes/ElggGroup.php index 61f699f1a..7ab0bfa48 100644 --- a/engine/classes/ElggGroup.php +++ b/engine/classes/ElggGroup.php @@ -48,21 +48,18 @@ class ElggGroup extends ElggEntity $msg = elgg_echo('IOException:FailedToLoadGUID', array(get_class(), $guid->guid)); throw new IOException($msg); } - - // Is $guid is an ElggGroup? Use a copy constructor } else if ($guid instanceof ElggGroup) { + // $guid is an ElggGroup so this is a copy constructor elgg_deprecated_notice('This type of usage of the ElggGroup constructor was deprecated. Please use the clone method.', 1.7); foreach ($guid->attributes as $key => $value) { $this->attributes[$key] = $value; } - - // Is this is an ElggEntity but not an ElggGroup = ERROR! } else if ($guid instanceof ElggEntity) { + // @todo why separate from else throw new InvalidParameterException(elgg_echo('InvalidParameterException:NonElggGroup')); - - // Is it a GUID } else if (is_numeric($guid)) { + // $guid is a GUID so load entity if (!$this->load($guid)) { throw new IOException(elgg_echo('IOException:FailedToLoadGUID', array(get_class(), $guid))); } diff --git a/engine/classes/ElggMenuBuilder.php b/engine/classes/ElggMenuBuilder.php index 639e34755..198018f3c 100644 --- a/engine/classes/ElggMenuBuilder.php +++ b/engine/classes/ElggMenuBuilder.php @@ -235,8 +235,8 @@ class ElggMenuBuilder { /** * Compare two menu items by their display text * - * @param ElggMenuItem $a - * @param ElggMenuItem $b + * @param ElggMenuItem $a Menu item + * @param ElggMenuItem $b Menu item * @return bool */ public static function compareByText($a, $b) { @@ -253,8 +253,8 @@ class ElggMenuBuilder { /** * Compare two menu items by their identifiers * - * @param ElggMenuItem $a - * @param ElggMenuItem $b + * @param ElggMenuItem $a Menu item + * @param ElggMenuItem $b Menu item * @return bool */ public static function compareByName($a, $b) { @@ -271,8 +271,8 @@ class ElggMenuBuilder { /** * Compare two menu items by their priority * - * @param ElggMenuItem $a - * @param ElggMenuItem $b + * @param ElggMenuItem $a Menu item + * @param ElggMenuItem $b Menu item * @return bool * * @todo change name to compareByPriority diff --git a/engine/classes/ElggObject.php b/engine/classes/ElggObject.php index 6263f84f6..3cb76ffaf 100644 --- a/engine/classes/ElggObject.php +++ b/engine/classes/ElggObject.php @@ -66,21 +66,18 @@ class ElggObject extends ElggEntity { $msg = elgg_echo('IOException:FailedToLoadGUID', array(get_class(), $guid->guid)); throw new IOException($msg); } - - // Is $guid is an ElggObject? Use a copy constructor } else if ($guid instanceof ElggObject) { + // $guid is an ElggObject so this is a copy constructor elgg_deprecated_notice('This type of usage of the ElggObject constructor was deprecated. Please use the clone method.', 1.7); foreach ($guid->attributes as $key => $value) { $this->attributes[$key] = $value; } - - // Is this is an ElggEntity but not an ElggObject = ERROR! } else if ($guid instanceof ElggEntity) { + // @todo remove - do not need separate exception throw new InvalidParameterException(elgg_echo('InvalidParameterException:NonElggObject')); - - // Is it a GUID } else if (is_numeric($guid)) { + // $guid is a GUID so load if (!$this->load($guid)) { throw new IOException(elgg_echo('IOException:FailedToLoadGUID', array(get_class(), $guid))); } diff --git a/engine/classes/ElggSite.php b/engine/classes/ElggSite.php index 1a34df195..deba5087e 100644 --- a/engine/classes/ElggSite.php +++ b/engine/classes/ElggSite.php @@ -77,28 +77,24 @@ class ElggSite extends ElggEntity { $msg = elgg_echo('IOException:FailedToLoadGUID', array(get_class(), $guid->guid)); throw new IOException($msg); } - - // Is $guid is an ElggSite? Use a copy constructor } else if ($guid instanceof ElggSite) { + // $guid is an ElggSite so this is a copy constructor elgg_deprecated_notice('This type of usage of the ElggSite constructor was deprecated. Please use the clone method.', 1.7); foreach ($guid->attributes as $key => $value) { $this->attributes[$key] = $value; } - - // Is this is an ElggEntity but not an ElggSite = ERROR! } else if ($guid instanceof ElggEntity) { + // @todo remove and just use else clause throw new InvalidParameterException(elgg_echo('InvalidParameterException:NonElggSite')); - - // See if this is a URL } else if (strpos($guid, "http") !== false) { + // url so retrieve by url $guid = get_site_by_url($guid); foreach ($guid->attributes as $key => $value) { $this->attributes[$key] = $value; } - - // Is it a GUID } else if (is_numeric($guid)) { + // $guid is a GUID so load if (!$this->load($guid)) { throw new IOException(elgg_echo('IOException:FailedToLoadGUID', array(get_class(), $guid))); } diff --git a/engine/classes/ElggTranslit.php b/engine/classes/ElggTranslit.php index 79116fc01..601965c11 100644 --- a/engine/classes/ElggTranslit.php +++ b/engine/classes/ElggTranslit.php @@ -58,15 +58,15 @@ class ElggTranslit { // remove all ASCII except 0-9a-zA-Z, hyphen, underscore, and whitespace // note: "x" modifier did not work with this pattern. $string = preg_replace('~[' - . '\x00-\x08' # control chars - . '\x0b\x0c' # vert tab, form feed - . '\x0e-\x1f' # control chars - . '\x21-\x2c' # ! ... , - . '\x2e\x2f' # . slash - . '\x3a-\x40' # : ... @ - . '\x5b-\x5e' # [ ... ^ - . '\x60' # ` - . '\x7b-\x7f' # { ... DEL + . '\x00-\x08' // control chars + . '\x0b\x0c' // vert tab, form feed + . '\x0e-\x1f' // control chars + . '\x21-\x2c' // ! ... , + . '\x2e\x2f' // . slash + . '\x3a-\x40' // : ... @ + . '\x5b-\x5e' // [ ... ^ + . '\x60' // ` + . '\x7b-\x7f' // { ... DEL . ']~', '', $string); $string = strtr($string, '', ''); @@ -80,10 +80,10 @@ class ElggTranslit { // note: we cannot use [^0-9a-zA-Z] because that matches multibyte chars. // note: "x" modifier did not work with this pattern. $pattern = '~[' - . '\x00-\x2f' # controls ... slash - . '\x3a-\x40' # : ... @ - . '\x5b-\x60' # [ ... ` - . '\x7b-\x7f' # { ... DEL + . '\x00-\x2f' // controls ... slash + . '\x3a-\x40' // : ... @ + . '\x5b-\x60' // [ ... ` + . '\x7b-\x7f' // { ... DEL . ']+~x'; // ['internationalization', 'and', '日本語'] diff --git a/engine/classes/ElggUser.php b/engine/classes/ElggUser.php index 6c1cdc1de..b80065b27 100644 --- a/engine/classes/ElggUser.php +++ b/engine/classes/ElggUser.php @@ -65,30 +65,26 @@ class ElggUser extends ElggEntity $msg = elgg_echo('IOException:FailedToLoadGUID', array(get_class(), $guid->guid)); throw new IOException($msg); } - - // See if this is a username } else if (is_string($guid)) { + // $guid is a username $user = get_user_by_username($guid); if ($user) { foreach ($user->attributes as $key => $value) { $this->attributes[$key] = $value; } } - - // Is $guid is an ElggUser? Use a copy constructor } else if ($guid instanceof ElggUser) { + // $guid is an ElggUser so this is a copy constructor elgg_deprecated_notice('This type of usage of the ElggUser constructor was deprecated. Please use the clone method.', 1.7); foreach ($guid->attributes as $key => $value) { $this->attributes[$key] = $value; } - - // Is this is an ElggEntity but not an ElggUser = ERROR! } else if ($guid instanceof ElggEntity) { + // @todo why have a special case here throw new InvalidParameterException(elgg_echo('InvalidParameterException:NonElggUser')); - - // Is it a GUID } else if (is_numeric($guid)) { + // $guid is a GUID so load entity if (!$this->load($guid)) { throw new IOException(elgg_echo('IOException:FailedToLoadGUID', array(get_class(), $guid))); } diff --git a/engine/lib/configuration.php b/engine/lib/configuration.php index a0f297f0c..55e5bbd36 100644 --- a/engine/lib/configuration.php +++ b/engine/lib/configuration.php @@ -486,9 +486,9 @@ function get_config($name, $site_guid = 0) { // @todo these haven't really been implemented in Elgg 1.8. Complete in 1.9. // show dep message if ($new_name) { - // $msg = "Config value $name has been renamed as $new_name"; + // $msg = "Config value $name has been renamed as $new_name"; $name = $new_name; - // elgg_deprecated_notice($msg, $dep_version); + // elgg_deprecated_notice($msg, $dep_version); } // decide from where to return the value diff --git a/engine/lib/elgglib.php b/engine/lib/elgglib.php index 74b70f9fb..281b23535 100644 --- a/engine/lib/elgglib.php +++ b/engine/lib/elgglib.php @@ -1383,8 +1383,8 @@ function elgg_http_build_url(array $parts, $html_encode = TRUE) { * add tokens to the action. The form view automatically handles * tokens. * - * @param string $url Full action URL - * @param bool $html_encode HTML encode the url? (default: false) + * @param string $url Full action URL + * @param bool $html_encode HTML encode the url? (default: false) * * @return string URL with action tokens * @since 1.7.0 @@ -1446,7 +1446,7 @@ function elgg_http_remove_url_query_element($url, $element) { * Adds an element or elements to a URL's query string. * * @param string $url The URL - * @param array $elements Key/value pairs to add to the URL + * @param array $elements Key/value pairs to add to the URL * * @return string The new URL with the query strings added * @since 1.7.0 diff --git a/engine/lib/languages.php b/engine/lib/languages.php index 17db14d98..61ba91ddb 100644 --- a/engine/lib/languages.php +++ b/engine/lib/languages.php @@ -139,6 +139,9 @@ function get_language() { return false; } +/** + * @access private + */ function _elgg_load_translations() { global $CONFIG; diff --git a/engine/lib/location.php b/engine/lib/location.php index b319bb3bb..1534c7d7b 100644 --- a/engine/lib/location.php +++ b/engine/lib/location.php @@ -139,7 +139,7 @@ function elgg_get_entities_from_location(array $options = array()) { /** * Returns a viewable list of entities from location * - * @param array $options + * @param array $options Options array * * @see elgg_list_entities() * @see elgg_get_entities_from_location() diff --git a/engine/lib/metadata.php b/engine/lib/metadata.php index 305e9918b..a1ebfa5f1 100644 --- a/engine/lib/metadata.php +++ b/engine/lib/metadata.php @@ -920,8 +920,8 @@ function elgg_get_metadata_cache() { * Invalidate the metadata cache based on options passed to various *_metadata functions * * @param string $action Action performed on metadata. "delete", "disable", or "enable" - * - * @param array $options Options passed to elgg_(delete|disable|enable)_metadata + * @param array $options Options passed to elgg_(delete|disable|enable)_metadata + * @return void */ function elgg_invalidate_metadata_cache($action, array $options) { // remove as little as possible, optimizing for common cases diff --git a/engine/lib/plugins.php b/engine/lib/plugins.php index f281b1416..6fc000cf9 100644 --- a/engine/lib/plugins.php +++ b/engine/lib/plugins.php @@ -865,7 +865,7 @@ function elgg_set_plugin_user_setting($name, $value, $user_guid = null, $plugin_ * Unsets a user-specific plugin setting * * @param string $name Name of the setting - * @param int $user_guid Defaults to logged in user + * @param int $user_guid Defaults to logged in user * @param string $plugin_id Defaults to contextual plugin name * * @return bool diff --git a/engine/lib/relationships.php b/engine/lib/relationships.php index fe0b8364d..b0cd627fc 100644 --- a/engine/lib/relationships.php +++ b/engine/lib/relationships.php @@ -363,7 +363,7 @@ $relationship_guid = NULL, $inverse_relationship = FALSE) { /** * Returns a viewable list of entities by relationship * - * @param array $options + * @param array $options Options array for retrieval of entities * * @see elgg_list_entities() * @see elgg_get_entities_from_relationship() diff --git a/engine/lib/views.php b/engine/lib/views.php index 7d8347863..c4b349fc6 100644 --- a/engine/lib/views.php +++ b/engine/lib/views.php @@ -1107,7 +1107,7 @@ function elgg_view_entity_annotations(ElggEntity $entity, $full_view = true) { * This is a shortcut for {@elgg_view page/elements/title}. * * @param string $title The page title - * @param array $vars View variables (was submenu be displayed? (deprecated)) + * @param array $vars View variables (was submenu be displayed? (deprecated)) * * @return string The HTML (etc) */ @@ -1179,7 +1179,7 @@ function elgg_view_comments($entity, $add_comment = true, array $vars = array()) * * @param string $image The icon and other information * @param string $body Description content - * @param array $vars Additional parameters for the view + * @param array $vars Additional parameters for the view * * @return string * @since 1.8.0 @@ -1236,15 +1236,17 @@ function elgg_view_river_item($item, array $vars = array()) { // subject is disabled or subject/object deleted return ''; } + + // @todo this needs to be cleaned up // Don't hide objects in closed groups that a user can see. // see http://trac.elgg.org/ticket/4789 -// else { -// // hide based on object's container -// $visibility = ElggGroupItemVisibility::factory($object->container_guid); -// if ($visibility->shouldHideItems) { -// return ''; -// } -// } + // else { + // // hide based on object's container + // $visibility = ElggGroupItemVisibility::factory($object->container_guid); + // if ($visibility->shouldHideItems) { + // return ''; + // } + // } $vars['item'] = $item; -- cgit v1.2.3 From ea4ce20b3632a3c55ffedfad1ad53845db5a7e12 Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 16 Mar 2013 12:03:21 -0400 Subject: coding standards --- engine/classes/ElggAccess.php | 4 ++++ engine/classes/ElggCache.php | 4 ++++ engine/classes/ElggData.php | 2 ++ engine/classes/ElggFileCache.php | 4 ++++ engine/classes/ElggXMLElement.php | 8 ++++++++ 5 files changed, 22 insertions(+) diff --git a/engine/classes/ElggAccess.php b/engine/classes/ElggAccess.php index 6f8d9bb4b..0aed477fc 100644 --- a/engine/classes/ElggAccess.php +++ b/engine/classes/ElggAccess.php @@ -16,6 +16,7 @@ class ElggAccess { */ private $ignore_access; + // @codingStandardsIgnoreStart /** * Get current ignore access setting. * @@ -26,6 +27,7 @@ class ElggAccess { elgg_deprecated_notice('ElggAccess::get_ignore_access() is deprecated by ElggAccess::getIgnoreAccess()', 1.8); return $this->getIgnoreAccess(); } + // @codingStandardsIgnoreEnd /** * Get current ignore access setting. @@ -36,6 +38,7 @@ class ElggAccess { return $this->ignore_access; } + // @codingStandardsIgnoreStart /** * Set ignore access. * @@ -49,6 +52,7 @@ class ElggAccess { elgg_deprecated_notice('ElggAccess::set_ignore_access() is deprecated by ElggAccess::setIgnoreAccess()', 1.8); return $this->setIgnoreAccess($ignore); } + // @codingStandardsIgnoreEnd /** * Set ignore access. diff --git a/engine/classes/ElggCache.php b/engine/classes/ElggCache.php index 4317f4be9..909eab39b 100644 --- a/engine/classes/ElggCache.php +++ b/engine/classes/ElggCache.php @@ -21,6 +21,7 @@ abstract class ElggCache implements ArrayAccess { $this->variables = array(); } + // @codingStandardsIgnoreStart /** * Set a cache variable. * @@ -35,6 +36,7 @@ abstract class ElggCache implements ArrayAccess { elgg_deprecated_notice('ElggCache::set_variable() is deprecated by ElggCache::setVariable()', 1.8); $this->setVariable($variable, $value); } + // @codingStandardsIgnoreEnd /** * Set a cache variable. @@ -52,6 +54,7 @@ abstract class ElggCache implements ArrayAccess { $this->variables[$variable] = $value; } + // @codingStandardsIgnoreStart /** * Get variables for this cache. * @@ -65,6 +68,7 @@ abstract class ElggCache implements ArrayAccess { elgg_deprecated_notice('ElggCache::get_variable() is deprecated by ElggCache::getVariable()', 1.8); return $this->getVariable($variable); } + // @codingStandardsIgnoreEnd /** * Get variables for this cache. diff --git a/engine/classes/ElggData.php b/engine/classes/ElggData.php index 426248ca3..4f843cde4 100644 --- a/engine/classes/ElggData.php +++ b/engine/classes/ElggData.php @@ -26,6 +26,7 @@ abstract class ElggData implements */ protected $attributes = array(); + // @codingStandardsIgnoreStart /** * Initialise the attributes array. * @@ -44,6 +45,7 @@ abstract class ElggData implements elgg_deprecated_notice('initialise_attributes() is deprecated by initializeAttributes()', 1.8); } } + // @codingStandardsIgnoreEnd /** * Initialize the attributes array. diff --git a/engine/classes/ElggFileCache.php b/engine/classes/ElggFileCache.php index e654f1db2..94143f777 100644 --- a/engine/classes/ElggFileCache.php +++ b/engine/classes/ElggFileCache.php @@ -26,6 +26,7 @@ class ElggFileCache extends ElggCache { } } + // @codingStandardsIgnoreStart /** * Create and return a handle to a file. * @@ -41,6 +42,7 @@ class ElggFileCache extends ElggCache { return $this->createFile($filename, $rw); } + // @codingStandardsIgnoreEnd /** * Create and return a handle to a file. @@ -72,6 +74,7 @@ class ElggFileCache extends ElggCache { return fopen($path . $filename, $rw); } + // @codingStandardsIgnoreStart /** * Create a sanitised filename for the file. * @@ -86,6 +89,7 @@ class ElggFileCache extends ElggCache { return $filename; } + // @codingStandardsIgnoreEnd /** * Create a sanitised filename for the file. diff --git a/engine/classes/ElggXMLElement.php b/engine/classes/ElggXMLElement.php index 4e4b7e63c..d7e912035 100644 --- a/engine/classes/ElggXMLElement.php +++ b/engine/classes/ElggXMLElement.php @@ -76,6 +76,10 @@ class ElggXMLElement { return $result; } + /** + * @param string $name Property name + * @return mixed + */ function __get($name) { switch ($name) { case 'name': @@ -94,6 +98,10 @@ class ElggXMLElement { return null; } + /** + * @param string $name Property name + * @return boolean + */ function __isset($name) { switch ($name) { case 'name': -- cgit v1.2.3 From 00819122111a081c17f1ae4c53974b0deb50757c Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 16 Mar 2013 12:41:16 -0400 Subject: more coding standard fixes --- engine/classes/ElggAttributeLoader.php | 30 ++++++++- engine/classes/ElggAutoP.php | 24 +++++--- engine/classes/ElggDiskFilestore.php | 9 +++ engine/classes/ElggVolatileMetadataCache.php | 92 +++++++++++++++------------- engine/classes/ElggXMLElement.php | 4 ++ engine/lib/opendd.php | 4 ++ 6 files changed, 110 insertions(+), 53 deletions(-) diff --git a/engine/classes/ElggAttributeLoader.php b/engine/classes/ElggAttributeLoader.php index 2d1c1abde..d1e15008e 100644 --- a/engine/classes/ElggAttributeLoader.php +++ b/engine/classes/ElggAttributeLoader.php @@ -4,6 +4,9 @@ * Loads ElggEntity attributes from DB or validates those passed in via constructor * * @access private + * + * @package Elgg.Core + * @subpackage DataModel */ class ElggAttributeLoader { @@ -65,9 +68,11 @@ class ElggAttributeLoader { public $full_loader = ''; /** - * @param string $class class of object being loaded - * @param string $required_type entity type this is being used to populate - * @param array $initialized_attrs attributes after initializeAttributes() has been run + * Constructor + * + * @param string $class class of object being loaded + * @param string $required_type entity type this is being used to populate + * @param array $initialized_attrs attributes after initializeAttributes() has been run * @throws InvalidArgumentException */ public function __construct($class, $required_type, array $initialized_attrs) { @@ -87,14 +92,33 @@ class ElggAttributeLoader { $this->secondary_attr_names = array_diff($all_attr_names, self::$primary_attr_names); } + /** + * Get primary attributes missing that are missing + * + * @param stdClass $row Database row + * @return array + */ protected function isMissingPrimaries($row) { return array_diff(self::$primary_attr_names, array_keys($row)) !== array(); } + /** + * Get secondary attributes that are missing + * + * @param stdClass $row Database row + * @return array + */ protected function isMissingSecondaries($row) { return array_diff($this->secondary_attr_names, array_keys($row)) !== array(); } + /** + * Check that the type is correct + * + * @param stdClass $row Database row + * @return void + * @throws InvalidClassException + */ protected function checkType($row) { if ($row['type'] !== $this->required_type) { $msg = elgg_echo('InvalidClassException:NotValidElggStar', array($row['guid'], $this->class)); diff --git a/engine/classes/ElggAutoP.php b/engine/classes/ElggAutoP.php index f3c7cc972..71536c433 100644 --- a/engine/classes/ElggAutoP.php +++ b/engine/classes/ElggAutoP.php @@ -7,6 +7,9 @@ * * In DIV elements, Ps are only added when there would be at * least two of them. + * + * @package Elgg.Core + * @subpackage Output */ class ElggAutoP { @@ -51,8 +54,12 @@ class ElggAutoP { protected $_alterList = 'article aside blockquote body details div footer header section'; + /** @var string */ protected $_unique = ''; + /** + * Constructor + */ public function __construct() { $this->_blocks = preg_split('@\\s+@', $this->_blocks); $this->_descendList = preg_split('@\\s+@', $this->_descendList); @@ -98,7 +105,7 @@ class ElggAutoP { $html = str_replace('&', $this->_unique . 'AMP', $html); $this->_doc = new DOMDocument(); - + // parse to DOM, suppressing loadHTML warnings // http://www.php.net/manual/en/domdocument.loadhtml.php#95463 libxml_use_internal_errors(true); @@ -112,7 +119,7 @@ class ElggAutoP { $this->_xpath = new DOMXPath($this->_doc); // start processing recursively at the BODY element $nodeList = $this->_xpath->query('//body[1]'); - $this->_addParagraphs($nodeList->item(0)); + $this->addParagraphs($nodeList->item(0)); // serialize back to HTML $html = $this->_doc->saveHTML(); @@ -187,15 +194,16 @@ class ElggAutoP { /** * Add P and BR elements as necessary * - * @param DOMElement $el + * @param DOMElement $el DOM element + * @return void */ - protected function _addParagraphs(DOMElement $el) { + protected function addParagraphs(DOMElement $el) { // no need to call recursively, just queue up $elsToProcess = array($el); $inlinesToProcess = array(); while ($el = array_shift($elsToProcess)) { // if true, we can alter all child nodes, if not, we'll just call - // _addParagraphs on each element in the descendInto list + // addParagraphs on each element in the descendInto list $alterInline = in_array($el->nodeName, $this->_alterList); // inside affected elements, we want to trim leading whitespace from @@ -229,8 +237,8 @@ class ElggAutoP { if ($alterInline) { $isText = ($node->nodeType === XML_TEXT_NODE); $isLastInline = (! $node->nextSibling - || ($node->nextSibling->nodeType === XML_ELEMENT_NODE - && in_array($node->nextSibling->nodeName, $this->_blocks))); + || ($node->nextSibling->nodeType === XML_ELEMENT_NODE + && in_array($node->nextSibling->nodeName, $this->_blocks))); if ($isElement) { $isFollowingBr = ($node->nodeName === 'br'); } @@ -263,7 +271,7 @@ class ElggAutoP { if ($isBlock) { if (in_array($node->nodeName, $this->_descendList)) { $elsToProcess[] = $node; - //$this->_addParagraphs($node); + //$this->addParagraphs($node); } } $openP = true; diff --git a/engine/classes/ElggDiskFilestore.php b/engine/classes/ElggDiskFilestore.php index 7374aad35..29547d83b 100644 --- a/engine/classes/ElggDiskFilestore.php +++ b/engine/classes/ElggDiskFilestore.php @@ -254,6 +254,7 @@ class ElggDiskFilestore extends ElggFilestore { } } + // @codingStandardsIgnoreStart /** * Create a directory $dirroot * @@ -268,6 +269,7 @@ class ElggDiskFilestore extends ElggFilestore { return $this->makeDirectoryRoot($dirroot); } + // @codingStandardsIgnoreEnd /** * Create a directory $dirroot @@ -287,6 +289,7 @@ class ElggDiskFilestore extends ElggFilestore { return true; } + // @codingStandardsIgnoreStart /** * Multibyte string tokeniser. * @@ -318,7 +321,9 @@ class ElggDiskFilestore extends ElggFilestore { return str_split($string); } } + // @codingStandardsIgnoreEnd + // @codingStandardsIgnoreStart /** * Construct a file path matrix for an entity. * @@ -332,6 +337,7 @@ class ElggDiskFilestore extends ElggFilestore { return $this->makefileMatrix($identifier); } + // @codingStandardsIgnoreEnd /** * Construct a file path matrix for an entity. @@ -351,7 +357,9 @@ class ElggDiskFilestore extends ElggFilestore { return "$time_created/$entity->guid/"; } + // @codingStandardsIgnoreEnd + // @codingStandardsIgnoreStart /** * Construct a filename matrix. * @@ -370,6 +378,7 @@ class ElggDiskFilestore extends ElggFilestore { return $this->makeFileMatrix($guid); } + // @codingStandardsIgnoreEnd /** * Returns a list of attributes to save to the database when saving diff --git a/engine/classes/ElggVolatileMetadataCache.php b/engine/classes/ElggVolatileMetadataCache.php index 8a33c198d..4acda7cee 100644 --- a/engine/classes/ElggVolatileMetadataCache.php +++ b/engine/classes/ElggVolatileMetadataCache.php @@ -33,9 +33,11 @@ class ElggVolatileMetadataCache { protected $ignoreAccess = null; /** - * @param int $entity_guid - * - * @param array $values + * Cache metadata for an entity + * + * @param int $entity_guid The GUID of the entity + * @param array $values The metadata values to cache + * @return void */ public function saveAll($entity_guid, array $values) { if (!$this->getIgnoreAccess()) { @@ -45,8 +47,9 @@ class ElggVolatileMetadataCache { } /** - * @param int $entity_guid - * + * Get the metadata for an entity + * + * @param int $entity_guid The GUID of the entity * @return array */ public function loadAll($entity_guid) { @@ -61,15 +64,17 @@ class ElggVolatileMetadataCache { * Declare that there may be fetch-able metadata names in storage that this * cache doesn't know about * - * @param int $entity_guid + * @param int $entity_guid The GUID of the entity + * @return void */ public function markOutOfSync($entity_guid) { unset($this->isSynchronized[$entity_guid]); } /** - * @param $entity_guid - * + * Have all the metadata for this entity been cached? + * + * @param int $entity_guid The GUID of the entity * @return bool */ public function isSynchronized($entity_guid) { @@ -77,13 +82,15 @@ class ElggVolatileMetadataCache { } /** - * @param int $entity_guid - * - * @param string $name - * - * @param array|int|string|null $value null means it is known that there is no - * fetch-able metadata under this name - * @param bool $allow_multiple + * Cache a piece of metadata + * + * @param int $entity_guid The GUID of the entity + * @param string $name The metadata name + * @param array|int|string|null $value The metadata value. null means it is + * known that there is no fetch-able + * metadata under this name + * @param bool $allow_multiple Can the metadata be an array + * @return void */ public function save($entity_guid, $name, $value, $allow_multiple = false) { if ($this->getIgnoreAccess()) { @@ -115,10 +122,8 @@ class ElggVolatileMetadataCache { * function's return value should be trusted (otherwise a null return value * is ambiguous). * - * @param int $entity_guid - * - * @param string $name - * + * @param int $entity_guid The GUID of the entity + * @param string $name The metadata name * @return array|string|int|null null = value does not exist */ public function load($entity_guid, $name) { @@ -133,9 +138,9 @@ class ElggVolatileMetadataCache { * Forget about this metadata entry. We don't want to try to guess what the * next fetch from storage will return * - * @param int $entity_guid - * - * @param string $name + * @param int $entity_guid The GUID of the entity + * @param string $name The metadata name + * @return void */ public function markUnknown($entity_guid, $name) { unset($this->values[$entity_guid][$name]); @@ -145,10 +150,8 @@ class ElggVolatileMetadataCache { /** * If true, load() will return an accurate value for this name * - * @param int $entity_guid - * - * @param string $name - * + * @param int $entity_guid The GUID of the entity + * @param string $name The metadata name * @return bool */ public function isKnown($entity_guid, $name) { @@ -163,10 +166,8 @@ class ElggVolatileMetadataCache { /** * Declare that metadata under this name is known to be not fetch-able from storage * - * @param int $entity_guid - * - * @param string $name - * + * @param int $entity_guid The GUID of the entity + * @param string $name The metadata name * @return array */ public function markEmpty($entity_guid, $name) { @@ -176,7 +177,8 @@ class ElggVolatileMetadataCache { /** * Forget about all metadata for an entity * - * @param int $entity_guid + * @param int $entity_guid The GUID of the entity + * @return void */ public function clear($entity_guid) { $this->values[$entity_guid] = array(); @@ -185,6 +187,8 @@ class ElggVolatileMetadataCache { /** * Clear entire cache and mark all entities as out of sync + * + * @return void */ public function flush() { $this->values = array(); @@ -197,7 +201,8 @@ class ElggVolatileMetadataCache { * * This setting makes this component a little more loosely-coupled. * - * @param bool $ignore + * @param bool $ignore Whether to ignore access or not + * @return void */ public function setIgnoreAccess($ignore) { $this->ignoreAccess = (bool) $ignore; @@ -205,12 +210,16 @@ class ElggVolatileMetadataCache { /** * Tell the cache to call elgg_get_ignore_access() to determing access status. + * + * @return void */ public function unsetIgnoreAccess() { $this->ignoreAccess = null; } /** + * Get the ignore access value + * * @return bool */ protected function getIgnoreAccess() { @@ -225,12 +234,10 @@ class ElggVolatileMetadataCache { * Invalidate based on options passed to the global *_metadata functions * * @param string $action Action performed on metadata. "delete", "disable", or "enable" - * - * @param array $options Options passed to elgg_(delete|disable|enable)_metadata - * - * "guid" if given, invalidation will be limited to this entity - * - * "metadata_name" if given, invalidation will be limited to metadata with this name + * @param array $options Options passed to elgg_(delete|disable|enable)_metadata + * "guid" if given, invalidation will be limited to this entity + * "metadata_name" if given, invalidation will be limited to metadata with this name + * @return void */ public function invalidateByOptions($action, array $options) { // remove as little as possible, optimizing for common cases @@ -254,7 +261,10 @@ class ElggVolatileMetadataCache { } /** - * @param int|array $guids + * Populate the cache from a set of entities + * + * @param int|array $guids Array of or single GUIDs + * @return void */ public function populateFromEntities($guids) { if (empty($guids)) { @@ -318,9 +328,7 @@ class ElggVolatileMetadataCache { * cache if RAM usage becomes an issue. * * @param array $guids GUIDs of entities to examine - * - * @param int $limit Limit in characters of all metadata (with ints casted to strings) - * + * @param int $limit Limit in characters of all metadata (with ints casted to strings) * @return array */ public function filterMetadataHeavyEntities(array $guids, $limit = 1024000) { diff --git a/engine/classes/ElggXMLElement.php b/engine/classes/ElggXMLElement.php index d7e912035..6f2633e25 100644 --- a/engine/classes/ElggXMLElement.php +++ b/engine/classes/ElggXMLElement.php @@ -77,6 +77,8 @@ class ElggXMLElement { } /** + * Override -> + * * @param string $name Property name * @return mixed */ @@ -99,6 +101,8 @@ class ElggXMLElement { } /** + * Override isset + * * @param string $name Property name * @return boolean */ diff --git a/engine/lib/opendd.php b/engine/lib/opendd.php index f00ea6aab..7d635a295 100644 --- a/engine/lib/opendd.php +++ b/engine/lib/opendd.php @@ -7,6 +7,8 @@ * @version 0.4 */ +// @codingStandardsIgnoreStart + /** * Attempt to construct an ODD object out of a XmlElement or sub-elements. * @@ -103,3 +105,5 @@ function ODD_Import($xml) { function ODD_Export(ODDDocument $document) { return "$document"; } + +// @codingStandardsIgnoreEnd -- cgit v1.2.3 From c79f4894d5e4bf88023e286dc03259cfa3f98414 Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 16 Mar 2013 12:43:12 -0400 Subject: fixed extra ignore --- engine/classes/ElggDiskFilestore.php | 1 - 1 file changed, 1 deletion(-) diff --git a/engine/classes/ElggDiskFilestore.php b/engine/classes/ElggDiskFilestore.php index 29547d83b..ded653436 100644 --- a/engine/classes/ElggDiskFilestore.php +++ b/engine/classes/ElggDiskFilestore.php @@ -357,7 +357,6 @@ class ElggDiskFilestore extends ElggFilestore { return "$time_created/$entity->guid/"; } - // @codingStandardsIgnoreEnd // @codingStandardsIgnoreStart /** -- cgit v1.2.3 From 36755bea9aefd7e8bf54deab7b29902f8733f9aa Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 16 Mar 2013 13:32:02 -0400 Subject: engine now is standards compliant --- engine/classes/ElggPlugin.php | 4 ++-- engine/lib/plugins.php | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/engine/classes/ElggPlugin.php b/engine/classes/ElggPlugin.php index ae447bddb..c1c46f272 100644 --- a/engine/classes/ElggPlugin.php +++ b/engine/classes/ElggPlugin.php @@ -649,8 +649,8 @@ class ElggPlugin extends ElggObject { // Note: this will not run re-run the init hooks! if ($return) { if ($this->canReadFile('activate.php')) { - $flags = ELGG_PLUGIN_INCLUDE_START | ELGG_PLUGIN_REGISTER_CLASSES - | ELGG_PLUGIN_REGISTER_LANGUAGES | ELGG_PLUGIN_REGISTER_VIEWS; + $flags = ELGG_PLUGIN_INCLUDE_START | ELGG_PLUGIN_REGISTER_CLASSES | + ELGG_PLUGIN_REGISTER_LANGUAGES | ELGG_PLUGIN_REGISTER_VIEWS; $this->start($flags); diff --git a/engine/lib/plugins.php b/engine/lib/plugins.php index 6fc000cf9..74bce45fd 100644 --- a/engine/lib/plugins.php +++ b/engine/lib/plugins.php @@ -312,10 +312,10 @@ function elgg_is_active_plugin($plugin_id, $site_guid = null) { */ function elgg_load_plugins() { $plugins_path = elgg_get_plugins_path(); - $start_flags = ELGG_PLUGIN_INCLUDE_START - | ELGG_PLUGIN_REGISTER_VIEWS - | ELGG_PLUGIN_REGISTER_LANGUAGES - | ELGG_PLUGIN_REGISTER_CLASSES; + $start_flags = ELGG_PLUGIN_INCLUDE_START | + ELGG_PLUGIN_REGISTER_VIEWS | + ELGG_PLUGIN_REGISTER_LANGUAGES | + ELGG_PLUGIN_REGISTER_CLASSES; if (!$plugins_path) { return false; -- cgit v1.2.3 From de9e434c2b1e606873beaee4d6e8b851a8f59fdb Mon Sep 17 00:00:00 2001 From: Matt Beckett Date: Tue, 19 Mar 2013 21:27:47 -0600 Subject: Allow logs to rotate without being deleted --- mod/logrotate/languages/en.php | 1 + mod/logrotate/start.php | 7 +++++-- mod/logrotate/views/default/plugins/logrotate/settings.php | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/mod/logrotate/languages/en.php b/mod/logrotate/languages/en.php index 27731d732..3af83e553 100644 --- a/mod/logrotate/languages/en.php +++ b/mod/logrotate/languages/en.php @@ -20,6 +20,7 @@ $english = array( 'logrotate:week' => 'week', 'logrotate:month' => 'month', 'logrotate:year' => 'year', + 'logrotate:never' => 'never', 'logrotate:logdeleted' => "Log deleted\n", 'logrotate:lognotdeleted' => "Error deleting log\n", diff --git a/mod/logrotate/start.php b/mod/logrotate/start.php index 28f14ad14..313cf1fd5 100644 --- a/mod/logrotate/start.php +++ b/mod/logrotate/start.php @@ -21,8 +21,11 @@ function logrotate_init() { // Register cron hook for archival of logs elgg_register_plugin_hook_handler('cron', $period, 'logrotate_archive_cron'); - // Register cron hook for deletion of selected archived logs - elgg_register_plugin_hook_handler('cron', $delete, 'logrotate_delete_cron'); + + if ($delete != 'never') { + // Register cron hook for deletion of selected archived logs + elgg_register_plugin_hook_handler('cron', $delete, 'logrotate_delete_cron'); + } } /** diff --git a/mod/logrotate/views/default/plugins/logrotate/settings.php b/mod/logrotate/views/default/plugins/logrotate/settings.php index bef8b308d..9fd3e08df 100644 --- a/mod/logrotate/views/default/plugins/logrotate/settings.php +++ b/mod/logrotate/views/default/plugins/logrotate/settings.php @@ -40,6 +40,7 @@ if (!$delete) { 'weekly' => elgg_echo('logrotate:week'), 'monthly' => elgg_echo('logrotate:month'), 'yearly' => elgg_echo('logrotate:year'), + 'never' => elgg_echo('logrotate:never'), ), 'value' => $delete, )); -- cgit v1.2.3 From b8bd4161058a2f539f1913a1dd9856f9e759c79c Mon Sep 17 00:00:00 2001 From: Matt Beckett Date: Wed, 20 Mar 2013 15:10:57 -0600 Subject: only show notification status if you're a member of the group --- mod/groups/views/default/groups/sidebar/my_status.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mod/groups/views/default/groups/sidebar/my_status.php b/mod/groups/views/default/groups/sidebar/my_status.php index 5951cbd28..1e4e84b80 100644 --- a/mod/groups/views/default/groups/sidebar/my_status.php +++ b/mod/groups/views/default/groups/sidebar/my_status.php @@ -41,7 +41,7 @@ if ($is_owner) { } // notification info -if (elgg_is_active_plugin('notifications')) { +if (elgg_is_active_plugin('notifications') && $is_member) { if ($subscribed) { elgg_register_menu_item('groups:my_status', array( 'name' => 'subscription_status', -- cgit v1.2.3 From 4da579033674ecdb134bc921f3f0666072419e6c Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Wed, 20 Mar 2013 21:00:41 -0400 Subject: Fixes #5232 handling empty profile url field --- actions/profile/edit.php | 2 +- mod/profile/views/default/profile/details.php | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/actions/profile/edit.php b/actions/profile/edit.php index 63fb31600..b817463ac 100644 --- a/actions/profile/edit.php +++ b/actions/profile/edit.php @@ -48,7 +48,7 @@ foreach ($profile_fields as $shortname => $valuetype) { forward(REFERER); } - if ($valuetype == 'url' && !preg_match('~^https?\://~i', $value)) { + if ($value && $valuetype == 'url' && !preg_match('~^https?\://~i', $value)) { $value = "http://$value"; } diff --git a/mod/profile/views/default/profile/details.php b/mod/profile/views/default/profile/details.php index 167f995ae..15df6c2fd 100644 --- a/mod/profile/views/default/profile/details.php +++ b/mod/profile/views/default/profile/details.php @@ -22,13 +22,13 @@ if (is_array($profile_fields) && sizeof($profile_fields) > 0) { } $value = $user->$shortname; - // validate urls - if ($valtype == 'url' && !preg_match('~^https?\://~i', $value)) { - $value = "http://$value"; - } - if (!empty($value)) { - //This function controls the alternating class + // validate urls + if ($valtype == 'url' && !preg_match('~^https?\://~i', $value)) { + $value = "http://$value"; + } + + // this controls the alternating class $even_odd = ( 'odd' != $even_odd ) ? 'odd' : 'even'; ?>
-- cgit v1.2.3 From 33015121e7ec691372a56be0c5ef03d24d9c3aac Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Thu, 21 Mar 2013 10:48:20 -0400 Subject: Auto-fixes profile URLs broken by #5232 --- mod/profile/views/default/profile/details.php | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mod/profile/views/default/profile/details.php b/mod/profile/views/default/profile/details.php index 15df6c2fd..da4e95690 100644 --- a/mod/profile/views/default/profile/details.php +++ b/mod/profile/views/default/profile/details.php @@ -23,6 +23,14 @@ if (is_array($profile_fields) && sizeof($profile_fields) > 0) { $value = $user->$shortname; if (!empty($value)) { + + // fix profile URLs populated by https://github.com/Elgg/Elgg/issues/5232 + // @todo Replace with upgrade script, only need to alter users with last_update after 1.8.13 + if ($valtype == 'url' && $value == 'http://') { + $user->$shortname = ''; + continue; + } + // validate urls if ($valtype == 'url' && !preg_match('~^https?\://~i', $value)) { $value = "http://$value"; -- cgit v1.2.3 From e757ad0cffd35507d6430b8635e2b934db58650f Mon Sep 17 00:00:00 2001 From: cash Date: Thu, 21 Mar 2013 19:47:08 -0400 Subject: Fixes #5268 adds warning for wrapping views --- mod/developers/languages/en.php | 3 ++- mod/developers/start.php | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/mod/developers/languages/en.php b/mod/developers/languages/en.php index 856efe008..266b5406e 100644 --- a/mod/developers/languages/en.php +++ b/mod/developers/languages/en.php @@ -28,7 +28,8 @@ $english = array( 'developers:label:show_strings' => "Show raw translation strings", 'developers:help:show_strings' => "This displays the translation strings used by elgg_echo().", 'developers:label:wrap_views' => "Wrap views", - 'developers:help:wrap_views' => "This wraps almost every view with HTML comments. Useful for finding the view creating particular HTML.", + 'developers:help:wrap_views' => "This wraps almost every view with HTML comments. Useful for finding the view creating particular HTML. + This can break non-HTML views in the default viewtype. See developers_wrap_views() for details.", 'developers:label:log_events' => "Log events and plugin hooks", 'developers:help:log_events' => "Write events and plugin hooks to the log. Warning: there are many of these per page.", diff --git a/mod/developers/start.php b/mod/developers/start.php index 413a8ed9b..94d0f652c 100644 --- a/mod/developers/start.php +++ b/mod/developers/start.php @@ -89,6 +89,15 @@ function developers_clear_strings() { /** * Post-process a view to add wrapper comments to it + * + * 1. Only process views served with the 'default' viewtype. + * 2. Does not wrap views that begin with js/ or css/ as they are not HTML. + * 3. Does not wrap views that are images (start with icon/). Is this still true? + * 4. Does not wrap input and output views (why?). + * 5. Does not wrap html head or the primary page shells + * + * @warning this will break views in the default viewtype that return non-HTML data + * that do not match the above restrictions. */ function developers_wrap_views($hook, $type, $result, $params) { if (elgg_get_viewtype() != "default") { -- cgit v1.2.3 From 7e098f94738607c7d263b76277278f9ef4d5d209 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Thu, 21 Mar 2013 22:50:57 -0400 Subject: Updated copyright. --- COPYRIGHT.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index 76781f25a..262515386 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -6,6 +6,7 @@ The MITRE Corportation (jricher@mitre.org) Curverider Ltd (info@elgg.com) Individuals: +Steve Clay (steve@mrclay.org) Cash Costello (cash.costello@gmail.com) Brett Profitt (brett.profitt@gmail.com) Dave Tosh (davidgtosh@gmail.com) -- cgit v1.2.3 From 64b411dbb879000649967b22508ff925033de718 Mon Sep 17 00:00:00 2001 From: Tom Date: Fri, 22 Mar 2013 09:49:00 +0100 Subject: Update navigation.php --- views/default/css/elements/navigation.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/views/default/css/elements/navigation.php b/views/default/css/elements/navigation.php index 62f370069..43da99dff 100644 --- a/views/default/css/elements/navigation.php +++ b/views/default/css/elements/navigation.php @@ -16,7 +16,8 @@ text-align: center; } .elgg-pagination li { - display: inline; + display: inline-block; + height: 16px; margin: 0 6px 0 0; text-align: center; } -- cgit v1.2.3 From fb6740130460560c88ab0f61a14fededb73a5950 Mon Sep 17 00:00:00 2001 From: Tom Date: Fri, 22 Mar 2013 09:49:48 +0100 Subject: Update ie7.php --- views/default/css/ie7.php | 1 + 1 file changed, 1 insertion(+) diff --git a/views/default/css/ie7.php b/views/default/css/ie7.php index 229df8431..90274797d 100644 --- a/views/default/css/ie7.php +++ b/views/default/css/ie7.php @@ -24,6 +24,7 @@ .elgg-menu-footer > li > a, .elgg-menu-footer li, .elgg-menu-general > li > a, +.elgg-pagination li, .elgg-menu-general li { display: inline; } -- cgit v1.2.3 From 5a8e0b7d91c20b842d7414036bd6a1e04f4f2fcc Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Sun, 24 Mar 2013 19:07:27 -0400 Subject: Prevent pagination LIs from overlapping when wrapping --- views/default/css/elements/navigation.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/views/default/css/elements/navigation.php b/views/default/css/elements/navigation.php index 43da99dff..49e36e494 100644 --- a/views/default/css/elements/navigation.php +++ b/views/default/css/elements/navigation.php @@ -17,7 +17,6 @@ } .elgg-pagination li { display: inline-block; - height: 16px; margin: 0 6px 0 0; text-align: center; } @@ -25,7 +24,8 @@ -webkit-border-radius: 4px; -moz-border-radius: 4px; border-radius: 4px; - + + display: block; padding: 2px 6px; color: #4690d6; border: 1px solid #4690d6; -- cgit v1.2.3 From 5b4d84398065f09da522aa0e1b239cce9dc6e7bb Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 30 Mar 2013 10:45:39 -0400 Subject: Fixes #4587 not passing empty string to date() --- mod/logbrowser/views/default/forms/logbrowser/refine.php | 4 ++-- mod/logbrowser/views/default/logbrowser/refine.php | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mod/logbrowser/views/default/forms/logbrowser/refine.php b/mod/logbrowser/views/default/forms/logbrowser/refine.php index ebf7f10ed..3d081c9c2 100644 --- a/mod/logbrowser/views/default/forms/logbrowser/refine.php +++ b/mod/logbrowser/views/default/forms/logbrowser/refine.php @@ -9,12 +9,12 @@ * @uses $vars['timeupper'] */ -if (isset($vars['timelower'])) { +if (isset($vars['timelower']) && $vars['timelower']) { $lowerval = date('r', $vars['timelower']); } else { $lowerval = ""; } -if (isset($vars['timeupper'])) { +if (isset($vars['timeupper']) && $vars['timeupper']) { $upperval = date('r', $vars['timeupper']); } else { $upperval = ""; diff --git a/mod/logbrowser/views/default/logbrowser/refine.php b/mod/logbrowser/views/default/logbrowser/refine.php index 86460c79e..b40f23fa3 100644 --- a/mod/logbrowser/views/default/logbrowser/refine.php +++ b/mod/logbrowser/views/default/logbrowser/refine.php @@ -19,7 +19,7 @@ $toggle_link = elgg_view('output/url', array( )); $form_class = 'elgg-module elgg-module-inline'; -if (!isset($vars['user_guid'])) { +if (!isset($vars['user_guid']) && !isset($vars['username'])) { $form_class .= ' hidden'; } -- cgit v1.2.3 From d167a3b62afd6c07cf6c9257c49d5dd77958a58e Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 30 Mar 2013 11:16:09 -0400 Subject: Fixes #4349 friends can edit pages now --- mod/pages/start.php | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/mod/pages/start.php b/mod/pages/start.php index 6d974f122..8debeef24 100644 --- a/mod/pages/start.php +++ b/mod/pages/start.php @@ -281,25 +281,37 @@ function page_notify_message($hook, $entity_type, $returnvalue, $params) { /** * Extend permissions checking to extend can-edit for write users. * - * @param unknown_type $hook - * @param unknown_type $entity_type - * @param unknown_type $returnvalue - * @param unknown_type $params + * @param string $hook + * @param string $entity_type + * @param bool $returnvalue + * @param array $params */ -function pages_write_permission_check($hook, $entity_type, $returnvalue, $params) -{ +function pages_write_permission_check($hook, $entity_type, $returnvalue, $params) { if ($params['entity']->getSubtype() == 'page' || $params['entity']->getSubtype() == 'page_top') { $write_permission = $params['entity']->write_access_id; $user = $params['user']; - if (($write_permission) && ($user)) { - // $list = get_write_access_array($user->guid); - $list = get_access_array($user->guid); // get_access_list($user->guid); - - if (($write_permission!=0) && (in_array($write_permission,$list))) { - return true; + if ($write_permission && $user) { + switch ($write_permission) { + case ACCESS_PRIVATE: + // Elgg's default decision is what we want + return; + break; + case ACCESS_FRIENDS: + $owner = $params['entity']->getOwnerEntity(); + if ($owner && $owner->isFriendsWith($user->guid)) { + return true; + } + break; + default: + $list = get_access_array($user->guid); + if (in_array($write_permission, $list)) { + // user in the access collection + return true; + } + break; } } } -- cgit v1.2.3 From cb8a932702a66a8d8ec69982441e71bde8fd1b5c Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 30 Mar 2013 12:04:19 -0400 Subject: Fixes #4867 handling boolean false values as metadata pair value --- engine/lib/metadata.php | 2 ++ engine/tests/api/metadata.php | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/engine/lib/metadata.php b/engine/lib/metadata.php index a1ebfa5f1..ad926a49a 100644 --- a/engine/lib/metadata.php +++ b/engine/lib/metadata.php @@ -619,6 +619,8 @@ $owner_guids = NULL) { // if the operand is IN don't quote it because quoting should be done already. if (is_numeric($pair['value'])) { $value = sanitise_string($pair['value']); + } else if (is_bool($pair['value'])) { + $value = (int) $pair['value']; } else if (is_array($pair['value'])) { $values_array = array(); diff --git a/engine/tests/api/metadata.php b/engine/tests/api/metadata.php index 825290d80..0862341c1 100644 --- a/engine/tests/api/metadata.php +++ b/engine/tests/api/metadata.php @@ -123,6 +123,20 @@ class ElggCoreMetadataAPITest extends ElggCoreUnitTest { $e->delete(); } + /** + * https://github.com/Elgg/Elgg/issues/4867 + */ + public function testElggGetEntityMetadataWhereSqlWithFalseValue() { + $pair = array('name' => 'test' , 'value' => false); + $result = elgg_get_entity_metadata_where_sql('e', 'metadata', null, null, $pair); + $where = preg_replace( '/\s+/', ' ', $result['wheres'][0]); + $this->assertTrue(strpos($where, "msn1.string = 'test' AND BINARY msv1.string = 0") > 0); + + $result = elgg_get_entity_metadata_where_sql('e', 'metadata', array('test'), array(false)); + $where = preg_replace( '/\s+/', ' ', $result['wheres'][0]); + $this->assertTrue(strpos($where, "msn.string IN ('test')) AND ( BINARY msv.string IN ('0')")); + } + // Make sure metadata with multiple values is correctly deleted when re-written // by another user // http://trac.elgg.org/ticket/2776 -- cgit v1.2.3 From bc2bc90bd5282d08ed87dc374f4135d8182c931b Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 30 Mar 2013 12:15:23 -0400 Subject: Refs #4356 do not display elgg_dump() messages on js or css requests --- engine/lib/elgglib.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/engine/lib/elgglib.php b/engine/lib/elgglib.php index f4b1a0a3e..a49b620ac 100644 --- a/engine/lib/elgglib.php +++ b/engine/lib/elgglib.php @@ -1185,6 +1185,11 @@ function elgg_dump($value, $to_screen = TRUE, $level = 'NOTICE') { $to_screen = FALSE; } + // Do not want to write to JS or CSS pages + if (elgg_in_context('js') || elgg_in_context('css')) { + $to_screen = FALSE; + } + if ($to_screen == TRUE) { echo '
';
 		print_r($value);
-- 
cgit v1.2.3


From 52a1bf6fd37021cb6bd1843898571ba2036fbf9d Mon Sep 17 00:00:00 2001
From: cash 
Date: Sat, 30 Mar 2013 12:32:00 -0400
Subject: Fixes #5083 removes messages sent by deleted user

---
 mod/messages/start.php | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/mod/messages/start.php b/mod/messages/start.php
index 5503a675a..714129e4b 100644
--- a/mod/messages/start.php
+++ b/mod/messages/start.php
@@ -51,6 +51,9 @@ function messages_init() {
 	elgg_register_plugin_hook_handler('notify:entity:message', 'object', 'messages_notification_msg');
 	register_notification_object('object', 'messages', elgg_echo('messages:new'));
 
+	// delete messages sent by a user when user is deleted
+	elgg_register_event_handler('delete', 'user', 'messages_purge');
+
 	// ecml
 	elgg_register_plugin_hook_handler('get_views', 'ecml', 'messages_ecml_views_hook');
 
@@ -425,6 +428,35 @@ function messages_user_hover_menu($hook, $type, $return, $params) {
 	return $return;
 }
 
+/**
+ * Delete messages from a user who is being deleted
+ *
+ * @param string   $event Event name
+ * @param string   $type  Event type
+ * @param ElggUser $user  User being deleted
+ */
+function messages_purge($event, $type, $user) {
+
+	// make sure we delete them all
+	$entity_disable_override = access_get_show_hidden_status();
+	access_show_hidden_entities(true);
+	$ia = elgg_set_ignore_access(true);
+
+	$options = array(
+		'type' => 'object',
+		'subtype' => 'messages',
+		'metadata_name' => 'fromId',
+		'metadata_value' => $user->getGUID(),
+		'limit' => 0,
+	);
+	$batch = new ElggBatch('elgg_get_entities_from_metadata', $options);
+	foreach ($batch as $e) {
+		$e->delete();
+	}
+
+	elgg_set_ignore_access($ia);
+	access_show_hidden_entities($entity_disable_override);
+}
 
 /**
  * Register messages with ECML.
-- 
cgit v1.2.3


From 9a50f1530843f7fe8505b3424afcd40ad9e87900 Mon Sep 17 00:00:00 2001
From: cash 
Date: Sat, 30 Mar 2013 12:35:32 -0400
Subject: put test in to prevent deleting messages from everyone in case no
 guid - should never happen

---
 mod/messages/start.php | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mod/messages/start.php b/mod/messages/start.php
index 714129e4b..6d0e82744 100644
--- a/mod/messages/start.php
+++ b/mod/messages/start.php
@@ -437,6 +437,10 @@ function messages_user_hover_menu($hook, $type, $return, $params) {
  */
 function messages_purge($event, $type, $user) {
 
+	if (!$user->getGUID()) {
+		return;
+	}
+
 	// make sure we delete them all
 	$entity_disable_override = access_get_show_hidden_status();
 	access_show_hidden_entities(true);
-- 
cgit v1.2.3


From 62ebd8ff12086b6f7aaaba846264e43169ab5c8c Mon Sep 17 00:00:00 2001
From: cash 
Date: Sat, 30 Mar 2013 12:50:30 -0400
Subject: Fixes #4378 prevent loading same library name twice

---
 engine/lib/elgglib.php | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/engine/lib/elgglib.php b/engine/lib/elgglib.php
index a49b620ac..fb652a141 100644
--- a/engine/lib/elgglib.php
+++ b/engine/lib/elgglib.php
@@ -93,10 +93,17 @@ function elgg_register_library($name, $location) {
  * @return void
  * @throws InvalidParameterException
  * @since 1.8.0
+ * @todo return boolean in 1.9 to indicate whether the library has been loaded
  */
 function elgg_load_library($name) {
 	global $CONFIG;
 
+	static $loaded_libraries = array();
+
+	if (in_array($name, $loaded_libraries)) {
+		return;
+	}
+
 	if (!isset($CONFIG->libraries)) {
 		$CONFIG->libraries = array();
 	}
@@ -113,6 +120,8 @@ function elgg_load_library($name) {
 		);
 		throw new InvalidParameterException($error);
 	}
+
+	$loaded_libraries[] = $name;
 }
 
 /**
-- 
cgit v1.2.3


From e579d5b32ea0f12450520a6d45183018e0851757 Mon Sep 17 00:00:00 2001
From: cash 
Date: Sat, 30 Mar 2013 13:29:51 -0400
Subject: Fixes #2682 strips tags from site name

---
 actions/admin/site/update_basic.php | 2 +-
 install/ElggInstaller.php           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/actions/admin/site/update_basic.php b/actions/admin/site/update_basic.php
index 97d258b65..9765182cc 100644
--- a/actions/admin/site/update_basic.php
+++ b/actions/admin/site/update_basic.php
@@ -16,7 +16,7 @@ if ($site = elgg_get_site_entity()) {
 	}
 
 	$site->description = get_input('sitedescription');
-	$site->name = get_input('sitename');
+	$site->name = strip_tags(get_input('sitename'));
 	$site->email = get_input('siteemail');
 	$site->save();
 
diff --git a/install/ElggInstaller.php b/install/ElggInstaller.php
index 93716f7cd..78cdde90f 100644
--- a/install/ElggInstaller.php
+++ b/install/ElggInstaller.php
@@ -1414,7 +1414,7 @@ class ElggInstaller {
 		$submissionVars['wwwroot'] = sanitise_filepath($submissionVars['wwwroot']);
 
 		$site = new ElggSite();
-		$site->name = $submissionVars['sitename'];
+		$site->name = strip_tags($submissionVars['sitename']);
 		$site->url = $submissionVars['wwwroot'];
 		$site->access_id = ACCESS_PUBLIC;
 		$site->email = $submissionVars['siteemail'];
-- 
cgit v1.2.3


From bb0a69ba571744697fd89ffbc97577a734b38d2f Mon Sep 17 00:00:00 2001
From: Paweł Sroka 
Date: Sat, 30 Mar 2013 19:16:41 +0100
Subject: Fixes #5302 - Automatically registers view to simplecache on
 elgg_get_simplecache_url call

---
 engine/lib/cache.php | 1 +
 1 file changed, 1 insertion(+)

diff --git a/engine/lib/cache.php b/engine/lib/cache.php
index 59359124e..3116c1a9b 100644
--- a/engine/lib/cache.php
+++ b/engine/lib/cache.php
@@ -208,6 +208,7 @@ function elgg_get_simplecache_url($type, $view) {
 	global $CONFIG;
 	$lastcache = (int)$CONFIG->lastcache;
 	$viewtype = elgg_get_viewtype();
+	elgg_register_simplecache_view("$type/$view");// see #5302
 	if (elgg_is_simplecache_enabled()) {
 		$url = elgg_get_site_url() . "cache/$type/$viewtype/$view.$lastcache.$type";
 	} else {
-- 
cgit v1.2.3


From aa8af5035f0f7b2ed54eb42a136b58565e023c01 Mon Sep 17 00:00:00 2001
From: Sem 
Date: Sun, 8 Jul 2012 13:23:40 +0200
Subject: Fixes #1708. Moving up subpages when delete a page wasn't be applied
 in page_top subtype.

---
 mod/pages/actions/pages/delete.php | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/mod/pages/actions/pages/delete.php b/mod/pages/actions/pages/delete.php
index 7a314a280..fbb4cf551 100644
--- a/mod/pages/actions/pages/delete.php
+++ b/mod/pages/actions/pages/delete.php
@@ -23,6 +23,23 @@ if (elgg_instanceof($page, 'object', 'page') || elgg_instanceof($page, 'object',
 		if ($children) {
 			foreach ($children as $child) {
 				$child->parent_guid = $parent;
+				
+				// If no parent, we need to transform $child in a page_top
+				if ($parent == 0) {
+					$dbprefix = elgg_get_config('dbprefix');
+					$subtype_id = add_subtype('object', 'page_top');
+					update_data("UPDATE {$dbprefix}entities
+						set subtype='$subtype_id' WHERE guid=$child->guid");
+					
+					// If memcache is available then delete this entry from the cache
+					static $newentity_cache;
+					if ((!$newentity_cache) && (is_memcache_available())) {
+						$newentity_cache = new ElggMemcache('new_entity_cache');
+					}
+					if ($newentity_cache) {
+						$newentity_cache->delete($guid);
+					}
+				}
 			}
 		}
 		
-- 
cgit v1.2.3


From 324f4ed234638c66815fbaf2601373c869d023b2 Mon Sep 17 00:00:00 2001
From: cash 
Date: Sat, 30 Mar 2013 15:00:53 -0400
Subject: fixed the switch from page to page_top - the parent_guid metadata
 needed to be deleted

---
 mod/pages/actions/pages/delete.php | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/mod/pages/actions/pages/delete.php b/mod/pages/actions/pages/delete.php
index fbb4cf551..f6b25cd7e 100644
--- a/mod/pages/actions/pages/delete.php
+++ b/mod/pages/actions/pages/delete.php
@@ -22,22 +22,24 @@ if (elgg_instanceof($page, 'object', 'page') || elgg_instanceof($page, 'object',
 		));
 		if ($children) {
 			foreach ($children as $child) {
-				$child->parent_guid = $parent;
-				
-				// If no parent, we need to transform $child in a page_top
-				if ($parent == 0) {
-					$dbprefix = elgg_get_config('dbprefix');
-					$subtype_id = add_subtype('object', 'page_top');
-					update_data("UPDATE {$dbprefix}entities
-						set subtype='$subtype_id' WHERE guid=$child->guid");
-					
-					// If memcache is available then delete this entry from the cache
-					static $newentity_cache;
-					if ((!$newentity_cache) && (is_memcache_available())) {
+				if ($parent) {
+					$child->parent_guid = $parent;
+				} else {
+					// If no parent, we need to transform $child to a page_top
+					$db_prefix = elgg_get_config('dbprefix');
+					$subtype_id = (int)get_subtype_id('object', 'page_top');
+					$child_guid = (int)$child->guid;
+					update_data("UPDATE {$db_prefix}entities
+						SET subtype = $subtype_id WHERE guid = $child_guid");
+					elgg_delete_metadata(array(
+						'guid' => $child_guid,
+						'metadata_name' => 'parent_guid',
+					));
+
+					// If memcache is available, delete this entry from the cache
+					if (is_memcache_available()) {
 						$newentity_cache = new ElggMemcache('new_entity_cache');
-					}
-					if ($newentity_cache) {
-						$newentity_cache->delete($guid);
+						$newentity_cache->delete($child_guid);
 					}
 				}
 			}
-- 
cgit v1.2.3


From d456d462674a72c270a9a1ce4066d5318e25a07a Mon Sep 17 00:00:00 2001
From: Sem 
Date: Sun, 8 Jul 2012 15:42:39 +0200
Subject: Refs #1708. Restore disappeared subpages, which its parent page was
 top_page and was deleted.

---
 mod/pages/start.php               | 13 ++++++++++++
 mod/pages/upgrades/2012061800.php | 43 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 mod/pages/upgrades/2012061800.php

diff --git a/mod/pages/start.php b/mod/pages/start.php
index 8debeef24..c1183c9bf 100644
--- a/mod/pages/start.php
+++ b/mod/pages/start.php
@@ -82,6 +82,8 @@ function pages_init() {
 
 	// register ecml views to parse
 	elgg_register_plugin_hook_handler('get_views', 'ecml', 'pages_ecml_views_hook');
+	
+	elgg_register_event_handler('upgrade', 'system', 'pages_run_upgrades');
 }
 
 /**
@@ -362,3 +364,14 @@ function pages_ecml_views_hook($hook, $entity_type, $return_value, $params) {
 
 	return $return_value;
 }
+
+/**
+ * Process upgrades for the pages plugin
+ */
+function pages_run_upgrades() {
+	$path = elgg_get_plugins_path() . 'pages/upgrades/';
+	$files = elgg_get_upgrade_files($path);
+	foreach ($files as $file) {
+		include "$path{$file}";
+	}
+}
diff --git a/mod/pages/upgrades/2012061800.php b/mod/pages/upgrades/2012061800.php
new file mode 100644
index 000000000..fe39faea5
--- /dev/null
+++ b/mod/pages/upgrades/2012061800.php
@@ -0,0 +1,43 @@
+guid);
+	$dbprefix = elgg_get_config('dbprefix');
+	$subtype_id = add_subtype('object', 'page_top');
+	update_data("UPDATE {$dbprefix}entities
+		set subtype='$subtype_id' WHERE guid=$page->guid");
+	return true;
+}
+
+$previous_access = elgg_set_ignore_access(true);
+
+$dbprefix = elgg_get_config('dbprefix');
+$name_metastring_id = get_metastring_id('parent_guid');
+
+// Looking for pages without metadata (see #3046)
+$options = array(
+	'type' => 'object',
+	'subtype' => 'page',
+	'wheres' => "NOT EXISTS (
+		SELECT 1 FROM {$dbprefix}metadata md
+		WHERE md.entity_guid = e.guid
+		AND md.name_id = $name_metastring_id)"
+);
+$batch = new ElggBatch('elgg_get_entities_from_metadata', $options, 'pages_2012061800', 100);
+elgg_set_ignore_access($previous_access);
+
+if ($batch->callbackResult) {
+	error_log("Elgg Pages upgrade (2012061800) succeeded");
+} else {
+	error_log("Elgg Pages upgrade (2012061800) failed");
+}
-- 
cgit v1.2.3


From 4b19f1801d29c6441b0f2aaa08ed834df1a0e056 Mon Sep 17 00:00:00 2001
From: cash 
Date: Sat, 30 Mar 2013 15:21:51 -0400
Subject: fixed upgrade script - was skipping pages because we were changing
 the results between calls to ElggBatch

---
 mod/pages/upgrades/2012061800.php | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/mod/pages/upgrades/2012061800.php b/mod/pages/upgrades/2012061800.php
index fe39faea5..c21ccae3b 100644
--- a/mod/pages/upgrades/2012061800.php
+++ b/mod/pages/upgrades/2012061800.php
@@ -1,21 +1,26 @@
 guid);
+function pages_2012061800($page) {
 	$dbprefix = elgg_get_config('dbprefix');
-	$subtype_id = add_subtype('object', 'page_top');
+	$subtype_id = (int)get_subtype_id('object', 'page_top');
+	$page_guid = (int)$page->guid;
 	update_data("UPDATE {$dbprefix}entities
-		set subtype='$subtype_id' WHERE guid=$page->guid");
+		SET subtype = $subtype_id WHERE guid = $page_guid");
+	error_log("called");
 	return true;
 }
 
@@ -23,8 +28,11 @@ $previous_access = elgg_set_ignore_access(true);
 
 $dbprefix = elgg_get_config('dbprefix');
 $name_metastring_id = get_metastring_id('parent_guid');
+if (!$name_metastring_id) {
+	return;
+}
 
-// Looking for pages without metadata (see #3046)
+// Looking for pages without metadata
 $options = array(
 	'type' => 'object',
 	'subtype' => 'page',
@@ -33,11 +41,9 @@ $options = array(
 		WHERE md.entity_guid = e.guid
 		AND md.name_id = $name_metastring_id)"
 );
-$batch = new ElggBatch('elgg_get_entities_from_metadata', $options, 'pages_2012061800', 100);
+$batch = new ElggBatch('elgg_get_entities_from_metadata', $options, 'pages_2012061800', 50, false);
 elgg_set_ignore_access($previous_access);
 
 if ($batch->callbackResult) {
 	error_log("Elgg Pages upgrade (2012061800) succeeded");
-} else {
-	error_log("Elgg Pages upgrade (2012061800) failed");
 }
-- 
cgit v1.2.3


From 574e9aefd04a6ea75f53d20756edb6a04a31ba2b Mon Sep 17 00:00:00 2001
From: cash 
Date: Sat, 30 Mar 2013 15:46:49 -0400
Subject: Fixes #5304 handling metadata arrays in search for user profile data

---
 mod/search/search_hooks.php | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/mod/search/search_hooks.php b/mod/search/search_hooks.php
index 47351fb8a..92c6d700a 100644
--- a/mod/search/search_hooks.php
+++ b/mod/search/search_hooks.php
@@ -178,11 +178,20 @@ function search_users_hook($hook, $type, $value, $params) {
 		$entity->setVolatileData('search_matched_title', $title);
 
 		$matched = '';
-		foreach ($profile_fields as $md) {
-			$text = $entity->$md;
-			if (stristr($text, $query)) {
-				$matched .= elgg_echo("profile:{$md}") . ': '  
-						. search_get_highlighted_relevant_substrings($text, $query);
+		foreach ($profile_fields as $md_name) {
+			$metadata = $entity->$md_name;
+			if (is_array($metadata)) {
+				foreach ($metadata as $text) {
+					if (stristr($text, $query)) {
+						$matched .= elgg_echo("profile:{$md_name}") . ': '
+								. search_get_highlighted_relevant_substrings($text, $query);
+					}
+				}
+			} else {
+				if (stristr($metadata, $query)) {
+					$matched .= elgg_echo("profile:{$md_name}") . ': '
+							. search_get_highlighted_relevant_substrings($metadata, $query);
+				}
 			}
 		}
 
-- 
cgit v1.2.3


From 4aa65bc720056238870119a92a3d19049f77ebe4 Mon Sep 17 00:00:00 2001
From: cash 
Date: Sat, 30 Mar 2013 16:13:51 -0400
Subject: Fixes #5037 returns free text search and relevance ranking for
 objects and groups

---
 mod/search/search_hooks.php | 93 ++++++++++++++++++++++-----------------------
 1 file changed, 45 insertions(+), 48 deletions(-)

diff --git a/mod/search/search_hooks.php b/mod/search/search_hooks.php
index 92c6d700a..c92003c7e 100644
--- a/mod/search/search_hooks.php
+++ b/mod/search/search_hooks.php
@@ -3,17 +3,17 @@
  * Elgg core search.
  *
  * @package Elgg
- * @subpackage Core
+ * @subpackage Search
  */
 
 /**
- * Return default results for searches on objects.
+ * Get objects that match the search parameters.
  *
- * @param unknown_type $hook
- * @param unknown_type $type
- * @param unknown_type $value
- * @param unknown_type $params
- * @return unknown_type
+ * @param string $hook   Hook name
+ * @param string $type   Hook type
+ * @param array  $value  Empty array
+ * @param array  $params Search parameters
+ * @return array
  */
 function search_objects_hook($hook, $type, $value, $params) {
 
@@ -23,7 +23,7 @@ function search_objects_hook($hook, $type, $value, $params) {
 	$params['joins'] = array($join);
 	$fields = array('title', 'description');
 
-	$where = search_get_where_sql('oe', $fields, $params, FALSE);
+	$where = search_get_where_sql('oe', $fields, $params);
 
 	$params['wheres'] = array($where);
 	$params['count'] = TRUE;
@@ -54,13 +54,13 @@ function search_objects_hook($hook, $type, $value, $params) {
 }
 
 /**
- * Return default results for searches on groups.
+ * Get groups that match the search parameters.
  *
- * @param unknown_type $hook
- * @param unknown_type $type
- * @param unknown_type $value
- * @param unknown_type $params
- * @return unknown_type
+ * @param string $hook   Hook name
+ * @param string $type   Hook type
+ * @param array  $value  Empty array
+ * @param array  $params Search parameters
+ * @return array
  */
 function search_groups_hook($hook, $type, $value, $params) {
 	$db_prefix = elgg_get_config('dbprefix');
@@ -69,12 +69,9 @@ function search_groups_hook($hook, $type, $value, $params) {
 
 	$join = "JOIN {$db_prefix}groups_entity ge ON e.guid = ge.guid";
 	$params['joins'] = array($join);
-	
 	$fields = array('name', 'description');
 
-	// force into boolean mode because we've having problems with the
-	// "if > 50% match 0 sets are returns" problem.
-	$where = search_get_where_sql('ge', $fields, $params, FALSE);
+	$where = search_get_where_sql('ge', $fields, $params);
 
 	$params['wheres'] = array($where);
 
@@ -109,15 +106,15 @@ function search_groups_hook($hook, $type, $value, $params) {
 }
 
 /**
- * Return default results for searches on users.
- *
- * @todo add profile field MD searching
+ * Get users that match the search parameters.
  *
- * @param unknown_type $hook
- * @param unknown_type $type
- * @param unknown_type $value
- * @param unknown_type $params
- * @return unknown_type
+ * Searches on username, display name, and profile fields
+ * 
+ * @param string $hook   Hook name
+ * @param string $type   Hook type
+ * @param array  $value  Empty array
+ * @param array  $params Search parameters
+ * @return array
  */
 function search_users_hook($hook, $type, $value, $params) {
 	$db_prefix = elgg_get_config('dbprefix');
@@ -205,13 +202,13 @@ function search_users_hook($hook, $type, $value, $params) {
 }
 
 /**
- * Return default results for searches on tags.
+ * Get entities with tags that match the search parameters.
  *
- * @param unknown_type $hook
- * @param unknown_type $type
- * @param unknown_type $value
- * @param unknown_type $params
- * @return unknown_type
+ * @param string $hook   Hook name
+ * @param string $type   Hook type
+ * @param array  $value  Empty array
+ * @param array  $params Search parameters
+ * @return array
  */
 function search_tags_hook($hook, $type, $value, $params) {
 	$db_prefix = elgg_get_config('dbprefix');
@@ -340,11 +337,11 @@ function search_tags_hook($hook, $type, $value, $params) {
 /**
  * Register tags as a custom search type.
  *
- * @param unknown_type $hook
- * @param unknown_type $type
- * @param unknown_type $value
- * @param unknown_type $params
- * @return unknown_type
+ * @param string $hook   Hook name
+ * @param string $type   Hook type
+ * @param array  $value  Array of custom search types
+ * @param array  $params Search parameters
+ * @return array
  */
 function search_custom_types_tags_hook($hook, $type, $value, $params) {
 	$value[] = 'tags';
@@ -353,13 +350,13 @@ function search_custom_types_tags_hook($hook, $type, $value, $params) {
 
 
 /**
- * Return default results for searches on comments.
+ * Get comments that match the search parameters.
  *
- * @param unknown_type $hook
- * @param unknown_type $type
- * @param unknown_type $value
- * @param unknown_type $params
- * @return unknown_type
+ * @param string $hook   Hook name
+ * @param string $type   Hook type
+ * @param array  $value  Empty array
+ * @param array  $params Search parameters
+ * @return array
  */
 function search_comments_hook($hook, $type, $value, $params) {
 	$db_prefix = elgg_get_config('dbprefix');
@@ -469,11 +466,11 @@ function search_comments_hook($hook, $type, $value, $params) {
 /**
  * Register comments as a custom search type.
  *
- * @param unknown_type $hook
- * @param unknown_type $type
- * @param unknown_type $value
- * @param unknown_type $params
- * @return unknown_type
+ * @param string $hook   Hook name
+ * @param string $type   Hook type
+ * @param array  $value  Array of custom search types
+ * @param array  $params Search parameters
+ * @return array
  */
 function search_custom_types_comments_hook($hook, $type, $value, $params) {
 	$value[] = 'comments';
-- 
cgit v1.2.3


From 31f5e27f60f3d9e5fcb3b6b9ab01b9d64a244b87 Mon Sep 17 00:00:00 2001
From: Steve Clay 
Date: Sun, 31 Mar 2013 18:18:33 -0400
Subject: Fixes #3754: Language JS views send cache headers and support
 conditional get

---
 js/lib/languages.js            |  3 +++
 views/default/js/languages.php | 24 +++++++++++++++++++++---
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/js/lib/languages.js b/js/lib/languages.js
index 44ea56d2b..d218cbc4f 100644
--- a/js/lib/languages.js
+++ b/js/lib/languages.js
@@ -30,6 +30,9 @@ elgg.reload_all_translations = function(language) {
 	var url, options;
 	url = 'ajax/view/js/languages';
 	options = {data: {language: lang}};
+    if (elgg.config.simplecache_enabled) {
+        options.data.lc = elgg.config.lastcache;
+    }
 
 	options['success'] = function(json) {
 		elgg.add_translation(lang, json);
diff --git a/views/default/js/languages.php b/views/default/js/languages.php
index c51d7bcb2..fcf903d4b 100644
--- a/views/default/js/languages.php
+++ b/views/default/js/languages.php
@@ -1,15 +1,33 @@
 translations['en'];
+// @todo add server-side caching
+if ($lastcache) {
+	// we're relying on lastcache changes to predict language changes
+	$etag = '"' . md5("$language|$lastcache") .  '"';
+
+	header('Expires: ' . gmdate('D, d M Y H:i:s \G\M\T', strtotime("+6 months")), true);
+	header("Pragma: public", true);
+	header("Cache-Control: public", true);
+	header("ETag: $etag");
+
+	if (isset($_SERVER['HTTP_IF_NONE_MATCH']) && trim($_SERVER['HTTP_IF_NONE_MATCH']) === $etag) {
+		header("HTTP/1.1 304 Not Modified");
+		exit;
+	}
+}
+
+$all_translations = elgg_get_config('translations');
+$translations = $all_translations['en'];
 
 if ($language != 'en') {
-	$translations = array_merge($translations, $CONFIG->translations[$language]);
+	$translations = array_merge($translations, $all_translations[$language]);
 }
 
 echo json_encode($translations);
\ No newline at end of file
-- 
cgit v1.2.3


From a4874cba03660c3c2169c71c1d32e5474304d984 Mon Sep 17 00:00:00 2001
From: Steve Clay 
Date: Sun, 31 Mar 2013 20:22:53 -0400
Subject: Fixes #5297: Improve error message in cases of suspected cross-domain
 login

---
 engine/lib/actions.php | 73 ++++++++++++++++++++++++++++++++++----------------
 htaccess_dist          |  8 ++++++
 languages/en.php       |  1 +
 3 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/engine/lib/actions.php b/engine/lib/actions.php
index f78ca63df..56936f582 100644
--- a/engine/lib/actions.php
+++ b/engine/lib/actions.php
@@ -74,8 +74,7 @@ function action($action, $forwarder = "") {
 	);
 
 	if (!in_array($action, $exceptions)) {
-		// All actions require a token.
-		action_gatekeeper();
+		action_gatekeeper($action);
 	}
 
 	$forwarder = str_replace(elgg_get_site_url(), "", $forwarder);
@@ -187,6 +186,26 @@ function elgg_unregister_action($action) {
 	}
 }
 
+/**
+ * Is the token timestamp within acceptable range?
+ * 
+ * @param int $ts timestamp from the CSRF token
+ * 
+ * @return bool
+ */
+function _elgg_validate_token_timestamp($ts) {
+	$action_token_timeout = elgg_get_config('action_token_timeout');
+	// default is 2 hours
+	$timeout = ($action_token_timeout !== null) ? $action_token_timeout : 2;
+
+	$hour = 60 * 60;
+	$timeout = $timeout * $hour;
+	$now = time();
+
+	// Validate time to ensure its not crazy
+	return ($timeout == 0 || ($ts > $now - $timeout) && ($ts < $now + $timeout));
+}
+
 /**
  * Validate an action token.
  *
@@ -205,8 +224,6 @@ function elgg_unregister_action($action) {
  * @access private
  */
 function validate_action_token($visibleerrors = TRUE, $token = NULL, $ts = NULL) {
-	global $CONFIG;
-
 	if (!$token) {
 		$token = get_input('__elgg_token');
 	}
@@ -215,29 +232,18 @@ function validate_action_token($visibleerrors = TRUE, $token = NULL, $ts = NULL)
 		$ts = get_input('__elgg_ts');
 	}
 
-	if (!isset($CONFIG->action_token_timeout)) {
-		// default to 2 hours
-		$timeout = 2;
-	} else {
-		$timeout = $CONFIG->action_token_timeout;
-	}
-
 	$session_id = session_id();
 
 	if (($token) && ($ts) && ($session_id)) {
 		// generate token, check with input and forward if invalid
-		$generated_token = generate_action_token($ts);
+		$required_token = generate_action_token($ts);
 
 		// Validate token
-		if ($token == $generated_token) {
-			$hour = 60 * 60;
-			$timeout = $timeout * $hour;
-			$now = time();
-
-			// Validate time to ensure its not crazy
-			if ($timeout == 0 || ($ts > $now - $timeout) && ($ts < $now + $timeout)) {
+		if ($token == $required_token) {
+			
+			if (_elgg_validate_token_timestamp($ts)) {
 				// We have already got this far, so unless anything
-				// else says something to the contry we assume we're ok
+				// else says something to the contrary we assume we're ok
 				$returnval = true;
 
 				$returnval = elgg_trigger_plugin_hook('action_gatekeeper:permissions:check', 'all', array(
@@ -293,12 +299,33 @@ function validate_action_token($visibleerrors = TRUE, $token = NULL, $ts = NULL)
  * This function verifies form input for security features (like a generated token),
  * and forwards if they are invalid.
  *
+ * @param string $action The action being performed
+ * 
  * @return mixed True if valid or redirects.
  * @access private
  */
-function action_gatekeeper() {
-	if (validate_action_token()) {
-		return TRUE;
+function action_gatekeeper($action) {
+	if ($action === 'login') {
+		if (validate_action_token(false)) {
+			return true;
+		}
+		
+		$token = get_input('__elgg_token');
+		$ts = (int)get_input('__elgg_ts');
+		if ($token && _elgg_validate_token_timestamp($ts)) {
+			// The tokens are present and the time looks valid: this is probably a mismatch due to the 
+			// login form being on a different domain.
+			register_error(elgg_echo('actiongatekeeper:crosssitelogin'));
+			
+			
+			forward('login', 'csrf');
+		}
+		
+		// let the validator send an appropriate msg
+		validate_action_token();
+		
+	} elseif (validate_action_token()) {
+		return true;
 	}
 
 	forward(REFERER, 'csrf');
diff --git a/htaccess_dist b/htaccess_dist
index 898fa22fb..44d129475 100644
--- a/htaccess_dist
+++ b/htaccess_dist
@@ -112,6 +112,14 @@ RewriteEngine on
 #
 #RewriteBase /
 
+
+# If your users receive the message "Sorry, logging in from a different domain is not permitted"
+# you must make sure your login form is served from the same hostname as your site pages.
+# See http://docs.elgg.org/wiki/Login_token_mismatch_error for more info.
+#
+# If you must add RewriteRules to change hostname, add them directly below (above all the others)
+
+
 # In for backwards compatibility
 RewriteRule ^pg\/([A-Za-z0-9\_\-]+)$ engine/handlers/page_handler.php?handler=$1&%{QUERY_STRING} [L]
 RewriteRule ^pg\/([A-Za-z0-9\_\-]+)\/(.*)$ engine/handlers/page_handler.php?handler=$1&page=$2&%{QUERY_STRING} [L]
diff --git a/languages/en.php b/languages/en.php
index 501855f02..a3c6cf2bf 100644
--- a/languages/en.php
+++ b/languages/en.php
@@ -1193,6 +1193,7 @@ You cannot reply to this email.",
 	'actiongatekeeper:timeerror' => 'The page you were using has expired. Please refresh and try again.',
 	'actiongatekeeper:pluginprevents' => 'A extension has prevented this form from being submitted.',
 	'actiongatekeeper:uploadexceeded' => 'The size of file(s) uploaded exceeded the limit set by your site administrator',
+	'actiongatekeeper:crosssitelogin' => "Sorry, logging in from a different domain is not permitted.",
 
 
 /**
-- 
cgit v1.2.3


From 5ccc0540fd28cc1620ffca10e3aed92319e78794 Mon Sep 17 00:00:00 2001
From: Steve Clay 
Date: Sun, 31 Mar 2013 21:09:07 -0400
Subject: Fixes #4972: More robust friendly titles implementation

---
 engine/classes/ElggTranslit.php       | 15 ++++++++++++---
 engine/lib/output.php                 |  8 +++-----
 engine/tests/regression/trac_bugs.php | 16 +++++++++-------
 3 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/engine/classes/ElggTranslit.php b/engine/classes/ElggTranslit.php
index 601965c11..4ae1d2479 100644
--- a/engine/classes/ElggTranslit.php
+++ b/engine/classes/ElggTranslit.php
@@ -49,10 +49,19 @@ class ElggTranslit {
 		// Internationalization, AND 日本語!
 		$string = self::transliterateAscii($string);
 
-		// more translation
+		// allow HTML tags in titles
+		$string = preg_replace('~<([a-zA-Z][^>]*)>~', ' $1 ', $string);
+
+		// more substitutions
+		// @todo put these somewhere else
 		$string = strtr($string, array(
-			// Euro/GBP
-			"\xE2\x82\xAC" /* € */ => 'E', "\xC2\xA3" /* £ */ => 'GBP',
+			// currency
+			"\xE2\x82\xAC" /* € */ => ' E ',
+			"\xC2\xA3" /* £ */ => ' GBP ',
+			
+			"&" => ' and ',
+			">" => ' greater than ',
+			"<" => ' less than ',
 		));
 
 		// remove all ASCII except 0-9a-zA-Z, hyphen, underscore, and whitespace
diff --git a/engine/lib/output.php b/engine/lib/output.php
index da8e1ab86..c5a04989b 100644
--- a/engine/lib/output.php
+++ b/engine/lib/output.php
@@ -284,11 +284,9 @@ function elgg_get_friendly_title($title) {
 		return $result;
 	}
 
-	// handle some special cases
-	$title = str_replace('&', 'and', $title);
-	// quotes and angle brackets stored in the database as html encoded
-	$title = htmlspecialchars_decode($title);
-
+	// titles are often stored HTML encoded
+	$title = html_entity_decode($title, ENT_QUOTES, 'UTF-8');
+	
 	$title = ElggTranslit::urlize($title);
 
 	return $title;
diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php
index 691433a41..58444dd39 100644
--- a/engine/tests/regression/trac_bugs.php
+++ b/engine/tests/regression/trac_bugs.php
@@ -206,21 +206,23 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest {
 	 */
 	public function test_friendly_title() {
 		$cases = array(
+			// acid test
+			"B&N > Amazon, OK?  'hey!' $34"
+			=> "b-and-n-greater-than-amazon-ok-bold-hey-34",
+
 			// hyphen, underscore and ASCII whitespace replaced by separator,
 			// other non-alphanumeric ASCII removed
-			"a-a_a a\na\ra\ta\va!a\"a#a\$a%a&a'a(a)a*a+a,a.a/a:a;aa?a@a[a\\a]a^a`a{a|a}a~a"
-			=> "a-a-a-a-a-a-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
-
+			"a-a_a a\na\ra\ta\va!a\"a#a\$a%a&a'a(a)a*a+a,a.a/a:a;a=a?a@a[a\\a]a^a`a{a|a}a~a"
+			=> "a-a-a-a-a-a-aaaaaaa-and-aaaaaaaaaaaaaaaaaaaaaaa",
+			
 			// separators trimmed
-			"-_ hello _-" => "hello",
+			"-_ hello _-" 
+			=> "hello",
 
 			// accents removed, lower case, other multibyte chars are URL encoded
 			"I\xC3\xB1t\xC3\xABrn\xC3\xA2ti\xC3\xB4n\xC3\xA0liz\xC3\xA6ti\xC3\xB8n, AND \xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E"
 				// Iñtërnâtiônàlizætiøn, AND 日本語
 			=> 'internationalizaetion-and-%E6%97%A5%E6%9C%AC%E8%AA%9E',
-
-			// some HTML entity replacements
-			"Me & You" => 'me-and-you',
 		);
 
 		// where available, string is converted to NFC before transliteration
-- 
cgit v1.2.3


From 095f1ca873a6569b37cfac7998109cbaf83d3af2 Mon Sep 17 00:00:00 2001
From: Jeff Tilson 
Date: Thu, 4 Apr 2013 12:44:32 -0400
Subject: Fix CSS typo preventing elgg-menu-annotation style from applying to
 annotation menus

---
 views/default/css/admin.php               | 2 +-
 views/default/css/elements/navigation.php | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/views/default/css/admin.php b/views/default/css/admin.php
index ceeac71a2..8197f29de 100644
--- a/views/default/css/admin.php
+++ b/views/default/css/admin.php
@@ -1003,7 +1003,7 @@ a.elgg-button {
 	ENTITY MENU
 *************************************** */
 
-.elgg-menu-entity, elgg-menu-annotation {
+.elgg-menu-entity, .elgg-menu-annotation {
 	float: right;
 	margin-left: 15px;
 	font-size: 90%;
diff --git a/views/default/css/elements/navigation.php b/views/default/css/elements/navigation.php
index 49e36e494..6b29e4c19 100644
--- a/views/default/css/elements/navigation.php
+++ b/views/default/css/elements/navigation.php
@@ -450,7 +450,7 @@
 	ENTITY AND ANNOTATION
 *************************************** */
 
-.elgg-menu-entity, elgg-menu-annotation {
+.elgg-menu-entity, .elgg-menu-annotation {
 	float: right;
 	margin-left: 15px;
 	font-size: 90%;
-- 
cgit v1.2.3


From 5103c706857719615102eda7cfd823b0723a1476 Mon Sep 17 00:00:00 2001
From: Jeff Tilson 
Date: Fri, 5 Apr 2013 13:35:13 -0400
Subject: Allow pages revisions to be reverted or deleted

---
 mod/pages/actions/annotations/page/delete.php     | 25 +++++++
 mod/pages/languages/en.php                        |  5 ++
 mod/pages/lib/pages.php                           |  7 +-
 mod/pages/pages/pages/edit.php                    | 13 +++-
 mod/pages/start.php                               | 79 ++++++++++++++++++++++-
 mod/pages/views/default/annotation/page.php       | 18 ++++++
 mod/pages/views/default/object/page_top.php       | 26 +++++---
 mod/pages/views/default/pages/sidebar/history.php |  1 +
 8 files changed, 160 insertions(+), 14 deletions(-)
 create mode 100644 mod/pages/actions/annotations/page/delete.php

diff --git a/mod/pages/actions/annotations/page/delete.php b/mod/pages/actions/annotations/page/delete.php
new file mode 100644
index 000000000..792b7c0bc
--- /dev/null
+++ b/mod/pages/actions/annotations/page/delete.php
@@ -0,0 +1,25 @@
+entity_guid);
+
+if ($annotation && $entity->canEdit() && $annotation->canEdit()) {
+	$annotation->delete();
+	system_message(elgg_echo("pages:revision:delete:success"));
+} else {
+	register_error(elgg_echo("pages:revision:delete:failure"));
+}
+
+forward("pages/history/{$annotation->entity_guid}");
\ No newline at end of file
diff --git a/mod/pages/languages/en.php b/mod/pages/languages/en.php
index 930676b3e..13b6ece2a 100644
--- a/mod/pages/languages/en.php
+++ b/mod/pages/languages/en.php
@@ -25,6 +25,8 @@ $english = array(
 	'pages:history' => "History",
 	'pages:view' => "View page",
 	'pages:revision' => "Revision",
+	'pages:current_revision' => "Current Revision",
+	'pages:revert' => "Revert",
 
 	'pages:navigation' => "Navigation",
 	'pages:new' => "A new page",
@@ -75,6 +77,9 @@ View and comment on the new page:
 	'pages:error:no_title' => 'You must specify a title for this page.',
 	'pages:delete:success' => 'The page was successfully deleted.',
 	'pages:delete:failure' => 'The page could not be deleted.',
+	'pages:revision:delete:success' => 'The page revision was successfully deleted.',
+	'pages:revision:delete:failure' => 'The page revision could not be deleted.',
+	'pages:revision:not_found' => 'Cannot find this revision.',
 
 	/**
 	 * Page
diff --git a/mod/pages/lib/pages.php b/mod/pages/lib/pages.php
index afe42b68f..7f90d53d8 100644
--- a/mod/pages/lib/pages.php
+++ b/mod/pages/lib/pages.php
@@ -9,7 +9,7 @@
  * @param ElggObject $page
  * @return array
  */
-function pages_prepare_form_vars($page = null, $parent_guid = 0) {
+function pages_prepare_form_vars($page = null, $parent_guid = 0, $revision = null) {
 
 	// input names => defaults
 	$values = array(
@@ -41,6 +41,11 @@ function pages_prepare_form_vars($page = null, $parent_guid = 0) {
 
 	elgg_clear_sticky_form('page');
 
+	// load the revision annotation if requested
+	if ($revision instanceof ElggAnnotation && $revision->entity_guid == $page->getGUID()) {
+		$values['description'] = $revision->value;
+	}
+
 	return $values;
 }
 
diff --git a/mod/pages/pages/pages/edit.php b/mod/pages/pages/pages/edit.php
index 1f411b94d..a925cdc55 100644
--- a/mod/pages/pages/pages/edit.php
+++ b/mod/pages/pages/pages/edit.php
@@ -8,6 +8,7 @@
 gatekeeper();
 
 $page_guid = (int)get_input('guid');
+$revision = (int)get_input('annotation_id');
 $page = get_entity($page_guid);
 if (!$page) {
 	register_error(elgg_echo('noaccess'));
@@ -28,7 +29,17 @@ elgg_push_breadcrumb(elgg_echo('edit'));
 $title = elgg_echo("pages:edit");
 
 if ($page->canEdit()) {
-	$vars = pages_prepare_form_vars($page);
+
+	if ($revision) {
+		$revision = elgg_get_annotation_from_id($revision);
+		if (!$revision || !($revision->entity_guid == $page_guid)) {
+			register_error(elgg_echo('pages:revision:not_found'));
+			forward(REFERER);
+		}
+	}
+
+	$vars = pages_prepare_form_vars($page, $page->parent_guid, $revision);
+	
 	$content = elgg_view_form('pages/edit', array(), $vars);
 } else {
 	$content = elgg_echo("pages:noaccess");
diff --git a/mod/pages/start.php b/mod/pages/start.php
index 8debeef24..780d3d9a7 100644
--- a/mod/pages/start.php
+++ b/mod/pages/start.php
@@ -28,9 +28,10 @@ function pages_init() {
 	elgg_register_annotation_url_handler('page', 'pages_revision_url');
 
 	// Register some actions
-	$action_base = elgg_get_plugins_path() . 'pages/actions/pages';
-	elgg_register_action("pages/edit", "$action_base/edit.php");
-	elgg_register_action("pages/delete", "$action_base/delete.php");
+	$action_base = elgg_get_plugins_path() . 'pages/actions';
+	elgg_register_action("pages/edit", "$action_base/pages/edit.php");
+	elgg_register_action("pages/delete", "$action_base/pages/delete.php");
+	elgg_register_action("annotations/page/delete", "$action_base/annotations/page/delete.php");
 
 	// Extend the main css view
 	elgg_extend_view('css/elgg', 'pages/css');
@@ -82,6 +83,9 @@ function pages_init() {
 
 	// register ecml views to parse
 	elgg_register_plugin_hook_handler('get_views', 'ecml', 'pages_ecml_views_hook');
+
+	// hook into annotation menu
+	elgg_register_plugin_hook_handler('register', 'menu:annotation', 'pages_annotation_menu_setup');
 }
 
 /**
@@ -362,3 +366,72 @@ function pages_ecml_views_hook($hook, $entity_type, $return_value, $params) {
 
 	return $return_value;
 }
+
+/**
+ * Adds items to "page" annotations menu
+ *
+ * @param unknown_type $hook
+ * @param unknown_type $entity_type
+ * @param unknown_type $return_value
+ * @param unknown_type $params
+ */
+function pages_annotation_menu_setup($hook, $type, $return, $params) {
+	$annotation = $params['annotation'];
+	/* @var ElggAnnotation $annotation */
+
+	$entity = get_entity($annotation->entity_guid);
+
+	if ($annotation->name == 'page' && $entity->canEdit() && $annotation->canEdit()) {
+		// Get last revision
+		$revisions = elgg_get_annotations(array(
+			'annotation_name' => 'page', 
+			'limit' => 1, 
+			'guid' => $annotation->entity_guid,
+			'reverse_order_by' => true,
+		));
+
+		// Check if this annotation is the last revision
+		if ($revisions) {
+			$current_revision = $revisions[0];
+			if ($current_revision == $annotation) {
+				// Don't allow any actions on last revision, just display 'current revision'
+				$options = array(
+					'name' => 'current',
+					'href' => false,
+					'text' => elgg_echo('pages:current_revision'),
+					'encode_text' => false
+				);
+				$return[] = ElggMenuItem::factory($options);
+				return $return;
+			}
+		}
+
+		// Revert
+		$options = array(
+			'name' => 'revert',
+			'href' => elgg_http_add_url_query_elements("pages/edit/{$annotation->entity_guid}", array(
+				'annotation_id' => $annotation->id
+			)),
+			'text' => elgg_echo('pages:revert'),
+			'encode_text' => false
+		);
+		$return[] = ElggMenuItem::factory($options);
+
+
+		// Delete
+		$url = elgg_http_add_url_query_elements('action/annotations/page/delete', array(
+			'annotation_id' => $annotation->id,
+		));
+
+		$options = array(
+			'name' => 'delete',
+			'href' => $url,
+			'text' => "",
+			'confirm' => elgg_echo('deleteconfirm'),
+			'encode_text' => false
+		);
+		$return[] = ElggMenuItem::factory($options);
+	}
+
+	return $return;
+}
diff --git a/mod/pages/views/default/annotation/page.php b/mod/pages/views/default/annotation/page.php
index a621b9281..ecb289092 100644
--- a/mod/pages/views/default/annotation/page.php
+++ b/mod/pages/views/default/annotation/page.php
@@ -39,4 +39,22 @@ $body = <<< HTML
 

$subtitle

HTML; +if (!elgg_in_context('widgets')) { + $menu = elgg_view_menu('annotation', array( + 'annotation' => $annotation, + 'sort_by' => 'priority', + 'class' => 'elgg-menu-hz float-alt', + )); +} + +$body = << + $menu +

$title_link

+ + $subtitle + +
+HTML; + echo elgg_view_image_block($icon, $body); \ No newline at end of file diff --git a/mod/pages/views/default/object/page_top.php b/mod/pages/views/default/object/page_top.php index 945a22eed..f35202993 100644 --- a/mod/pages/views/default/object/page_top.php +++ b/mod/pages/views/default/object/page_top.php @@ -60,18 +60,26 @@ if ($comments_count != 0 && !$revision) { $comments_link = ''; } -$metadata = elgg_view_menu('entity', array( - 'entity' => $vars['entity'], - 'handler' => 'pages', - 'sort_by' => 'priority', - 'class' => 'elgg-menu-hz', -)); - $subtitle = "$editor_text $comments_link $categories"; // do not show the metadata and controls in widget view -if (elgg_in_context('widgets') || $revision) { - $metadata = ''; +if (!elgg_in_context('widgets')) { + // If we're looking at a revision, display annotation menu + if ($revision) { + $metadata = elgg_view_menu('annotation', array( + 'annotation' => $annotation, + 'sort_by' => 'priority', + 'class' => 'elgg-menu-hz float-alt', + )); + } else { + // Regular entity menu + $metadata = elgg_view_menu('entity', array( + 'entity' => $vars['entity'], + 'handler' => 'pages', + 'sort_by' => 'priority', + 'class' => 'elgg-menu-hz', + )); + } } if ($full) { diff --git a/mod/pages/views/default/pages/sidebar/history.php b/mod/pages/views/default/pages/sidebar/history.php index 7077edb9a..e0e8ed11a 100644 --- a/mod/pages/views/default/pages/sidebar/history.php +++ b/mod/pages/views/default/pages/sidebar/history.php @@ -14,6 +14,7 @@ if ($vars['page']) { 'limit' => 20, 'reverse_order_by' => true ); + elgg_push_context('widgets'); $content = elgg_list_annotations($options); } -- cgit v1.2.3 From 7b002adf2fd383e6a0e7e4b93890720d99750282 Mon Sep 17 00:00:00 2001 From: Jeff Tilson Date: Tue, 9 Apr 2013 11:19:49 -0400 Subject: Removing redundant logged in user check from pages annotation delete action (also from the comments delete core action) --- actions/comments/delete.php | 5 ----- mod/pages/actions/annotations/page/delete.php | 5 ----- 2 files changed, 10 deletions(-) diff --git a/actions/comments/delete.php b/actions/comments/delete.php index f2c058ff4..c6b481da4 100644 --- a/actions/comments/delete.php +++ b/actions/comments/delete.php @@ -5,11 +5,6 @@ * @package Elgg */ -// Ensure we're logged in -if (!elgg_is_logged_in()) { - forward(); -} - // Make sure we can get the comment in question $annotation_id = (int) get_input('annotation_id'); $comment = elgg_get_annotation_from_id($annotation_id); diff --git a/mod/pages/actions/annotations/page/delete.php b/mod/pages/actions/annotations/page/delete.php index 792b7c0bc..156b516d2 100644 --- a/mod/pages/actions/annotations/page/delete.php +++ b/mod/pages/actions/annotations/page/delete.php @@ -5,11 +5,6 @@ * @package ElggPages */ -// Ensure we're logged in -if (!elgg_is_logged_in()) { - forward(); -} - // Make sure we can get the annotations and entity in question $annotation_id = (int) get_input('annotation_id'); $annotation = elgg_get_annotation_from_id($annotation_id); -- cgit v1.2.3 From bea155b1381008272eb02b74c02134d44f61767a Mon Sep 17 00:00:00 2001 From: Jeff Tilson Date: Wed, 10 Apr 2013 10:42:39 -0400 Subject: Changed blog draft noticed CSS classes --- mod/blog/views/default/forms/blog/save.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mod/blog/views/default/forms/blog/save.php b/mod/blog/views/default/forms/blog/save.php index 36fa2e0e8..f825acca1 100644 --- a/mod/blog/views/default/forms/blog/save.php +++ b/mod/blog/views/default/forms/blog/save.php @@ -10,7 +10,7 @@ $vars['entity'] = $blog; $draft_warning = $vars['draft_warning']; if ($draft_warning) { - $draft_warning = '' . $draft_warning . ''; + $draft_warning = '' . $draft_warning . ''; } $action_buttons = ''; -- cgit v1.2.3 From cbb3c583b558288c546d477ea1f4c19d8e6eb937 Mon Sep 17 00:00:00 2001 From: Jeff Tilson Date: Thu, 11 Apr 2013 16:01:27 -0400 Subject: Register bookmarks view for ecml parsing --- mod/bookmarks/start.php | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/mod/bookmarks/start.php b/mod/bookmarks/start.php index 3846f5165..a5685388b 100644 --- a/mod/bookmarks/start.php +++ b/mod/bookmarks/start.php @@ -56,6 +56,9 @@ function bookmarks_init() { // Listen to notification events and supply a more useful message elgg_register_plugin_hook_handler('notify:entity:message', 'object', 'bookmarks_notify_message'); + // Register bookmarks view for ecml parsing + elgg_register_plugin_hook_handler('get_views', 'ecml', 'bookmarks_ecml_views_hook'); + // Register a URL handler for bookmarks elgg_register_entity_url_handler('object', 'bookmarks', 'bookmark_url'); @@ -295,3 +298,16 @@ function bookmarks_page_menu($hook, $type, $return, $params) { return $return; } + +/** + * Return bookmarks views to parse for ecml + * + * @param string $hook + * @param string $type + * @param array $return + * @param array $params + */ +function bookmarks_ecml_views_hook($hook, $type, $return, $params) { + $return['object/bookmarks'] = elgg_echo('item:object:bookmarks'); + return $return; +} -- cgit v1.2.3 From 6dac3f3be526a0fdc2aea05444ed3fe0d4e7bcf5 Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Sat, 13 Apr 2013 09:44:17 -0400 Subject: clear entity cache for the subpages that were promoted to pages --- mod/pages/actions/pages/delete.php | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mod/pages/actions/pages/delete.php b/mod/pages/actions/pages/delete.php index f6b25cd7e..c99f15fbf 100644 --- a/mod/pages/actions/pages/delete.php +++ b/mod/pages/actions/pages/delete.php @@ -21,30 +21,33 @@ if (elgg_instanceof($page, 'object', 'page') || elgg_instanceof($page, 'object', 'metadata_value' => $page->getGUID() )); if ($children) { + $db_prefix = elgg_get_config('dbprefix'); + $subtype_id = (int)get_subtype_id('object', 'page_top'); + $newentity_cache = is_memcache_available() ? new ElggMemcache('new_entity_cache') : null; + foreach ($children as $child) { if ($parent) { $child->parent_guid = $parent; } else { // If no parent, we need to transform $child to a page_top - $db_prefix = elgg_get_config('dbprefix'); - $subtype_id = (int)get_subtype_id('object', 'page_top'); $child_guid = (int)$child->guid; + update_data("UPDATE {$db_prefix}entities SET subtype = $subtype_id WHERE guid = $child_guid"); + elgg_delete_metadata(array( 'guid' => $child_guid, 'metadata_name' => 'parent_guid', )); - // If memcache is available, delete this entry from the cache - if (is_memcache_available()) { - $newentity_cache = new ElggMemcache('new_entity_cache'); + invalidate_cache_for_entity($child_guid); + if ($newentity_cache) { $newentity_cache->delete($child_guid); } } } } - + if ($page->delete()) { system_message(elgg_echo('pages:delete:success')); if ($parent) { -- cgit v1.2.3 From 9b526b7867a461841e4f990d4ea7945660dac18d Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Sat, 13 Apr 2013 10:01:39 -0400 Subject: Refs #5297 encourage users to try again now that they have been forwarded to the correct domain --- languages/en.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/languages/en.php b/languages/en.php index a3c6cf2bf..be86e12e6 100644 --- a/languages/en.php +++ b/languages/en.php @@ -1193,7 +1193,7 @@ You cannot reply to this email.", 'actiongatekeeper:timeerror' => 'The page you were using has expired. Please refresh and try again.', 'actiongatekeeper:pluginprevents' => 'A extension has prevented this form from being submitted.', 'actiongatekeeper:uploadexceeded' => 'The size of file(s) uploaded exceeded the limit set by your site administrator', - 'actiongatekeeper:crosssitelogin' => "Sorry, logging in from a different domain is not permitted.", + 'actiongatekeeper:crosssitelogin' => "Sorry, logging in from a different domain is not permitted. Please try again.", /** -- cgit v1.2.3 From c1af06ed4faa8e1b0a84d6d7efbfcd0ed4c6598f Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 13 Apr 2013 13:08:56 -0400 Subject: documentation updates --- engine/classes/ElggStaticVariableCache.php | 6 +++--- engine/lib/database.php | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/engine/classes/ElggStaticVariableCache.php b/engine/classes/ElggStaticVariableCache.php index 17d849400..9c14fdfba 100644 --- a/engine/classes/ElggStaticVariableCache.php +++ b/engine/classes/ElggStaticVariableCache.php @@ -11,7 +11,7 @@ class ElggStaticVariableCache extends ElggSharedMemoryCache { /** * The cache. * - * @var unknown_type + * @var array */ private static $__cache; @@ -22,7 +22,7 @@ class ElggStaticVariableCache extends ElggSharedMemoryCache { * memory, optionally with a given namespace (to avoid overlap). * * @param string $namespace The namespace for this cache to write to. - * @note namespaces of the same name are shared! + * @warning namespaces of the same name are shared! */ function __construct($namespace = 'default') { $this->setNamespace($namespace); @@ -80,7 +80,7 @@ class ElggStaticVariableCache extends ElggSharedMemoryCache { } /** - * This was probably meant to delete everything? + * Clears the cache for a particular namespace * * @return void */ diff --git a/engine/lib/database.php b/engine/lib/database.php index 2b348366d..18235149d 100644 --- a/engine/lib/database.php +++ b/engine/lib/database.php @@ -20,6 +20,7 @@ * @warning be array this var may be an array or ElggStaticVariableCache depending on when called :( * * @global ElggStaticVariableCache|array $DB_QUERY_CACHE + * @access private */ global $DB_QUERY_CACHE; $DB_QUERY_CACHE = array(); @@ -40,6 +41,7 @@ $DB_QUERY_CACHE = array(); * * * @global array $DB_DELAYED_QUERIES + * @access private */ global $DB_DELAYED_QUERIES; $DB_DELAYED_QUERIES = array(); @@ -51,6 +53,7 @@ $DB_DELAYED_QUERIES = array(); * $dblink as $dblink[$name] => resource. Use get_db_link($name) to retrieve it. * * @global resource[] $dblink + * @access private */ global $dblink; $dblink = array(); @@ -61,6 +64,7 @@ $dblink = array(); * Each call to the database increments this counter. * * @global integer $dbcalls + * @access private */ global $dbcalls; $dbcalls = 0; -- cgit v1.2.3 From f461006377263b4a3cfe6b5f0c50f0df9e4fe35d Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 13 Apr 2013 13:15:25 -0400 Subject: removed remnants of db profiling --- engine/lib/upgrades/2009102801.php | 4 ++-- engine/lib/upgrades/2010061501.php | 4 ++-- engine/lib/upgrades/2010071001.php | 4 ++-- engine/lib/upgrades/2010071002.php | 4 ++-- engine/lib/upgrades/2011052801.php | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/engine/lib/upgrades/2009102801.php b/engine/lib/upgrades/2009102801.php index cab9a6835..b91b99d95 100644 --- a/engine/lib/upgrades/2009102801.php +++ b/engine/lib/upgrades/2009102801.php @@ -203,14 +203,14 @@ function user_file_matrix($guid) { return "$time_created/$user->guid/"; } -global $DB_QUERY_CACHE, $DB_PROFILE, $ENTITY_CACHE; +global $DB_QUERY_CACHE, $ENTITY_CACHE, $CONFIG; /** * Upgrade file locations */ $users = mysql_query("SELECT guid, username FROM {$CONFIG->dbprefix}users_entity WHERE username != ''"); while ($user = mysql_fetch_object($users)) { - $DB_QUERY_CACHE = $DB_PROFILE = $ENTITY_CACHE = array(); + $DB_QUERY_CACHE = $ENTITY_CACHE = array(); $to = $CONFIG->dataroot . user_file_matrix($user->guid); foreach (array('1_0', '1_1', '1_6') as $version) { diff --git a/engine/lib/upgrades/2010061501.php b/engine/lib/upgrades/2010061501.php index 9ff7d3102..b23ad0820 100644 --- a/engine/lib/upgrades/2010061501.php +++ b/engine/lib/upgrades/2010061501.php @@ -45,7 +45,7 @@ if ($dbversion < 2009100701) { } } - global $DB_QUERY_CACHE, $DB_PROFILE, $ENTITY_CACHE; + global $DB_QUERY_CACHE, $ENTITY_CACHE; /** Upgrade file locations @@ -60,7 +60,7 @@ if ($dbversion < 2009100701) { $users = mysql_query("SELECT guid, username FROM {$CONFIG->dbprefix}users_entity WHERE username != ''", $link); while ($user = mysql_fetch_object($users)) { - $DB_QUERY_CACHE = $DB_PROFILE = $ENTITY_CACHE = array(); + $DB_QUERY_CACHE = $ENTITY_CACHE = array(); $to = $CONFIG->dataroot . user_file_matrix($user->guid); foreach (array('1_0', '1_1', '1_6') as $version) { diff --git a/engine/lib/upgrades/2010071001.php b/engine/lib/upgrades/2010071001.php index 1b5d379d8..34f5a773e 100644 --- a/engine/lib/upgrades/2010071001.php +++ b/engine/lib/upgrades/2010071001.php @@ -30,11 +30,11 @@ function user_file_matrix_2010071001($guid) { $sizes = array('large', 'medium', 'small', 'tiny', 'master', 'topbar'); -global $DB_QUERY_CACHE, $DB_PROFILE, $ENTITY_CACHE, $CONFIG; +global $DB_QUERY_CACHE, $ENTITY_CACHE, $CONFIG; $users = mysql_query("SELECT guid, username FROM {$CONFIG->dbprefix}users_entity WHERE username != ''"); while ($user = mysql_fetch_object($users)) { - $DB_QUERY_CACHE = $DB_PROFILE = $ENTITY_CACHE = array(); + $DB_QUERY_CACHE = $ENTITY_CACHE = array(); $user_directory = user_file_matrix_2010071001($user->guid); if (!$user_directory) { diff --git a/engine/lib/upgrades/2010071002.php b/engine/lib/upgrades/2010071002.php index 30bd6538c..d1c74ed48 100644 --- a/engine/lib/upgrades/2010071002.php +++ b/engine/lib/upgrades/2010071002.php @@ -4,12 +4,12 @@ */ // loop through all users checking collections and notifications -global $DB_QUERY_CACHE, $DB_PROFILE, $ENTITY_CACHE, $CONFIG; +global $DB_QUERY_CACHE, $ENTITY_CACHE, $CONFIG; global $NOTIFICATION_HANDLERS; $users = mysql_query("SELECT guid, username FROM {$CONFIG->dbprefix}users_entity WHERE username != ''"); while ($user = mysql_fetch_object($users)) { - $DB_QUERY_CACHE = $DB_PROFILE = $ENTITY_CACHE = array(); + $DB_QUERY_CACHE = $ENTITY_CACHE = array(); $user = get_entity($user->guid); foreach ($NOTIFICATION_HANDLERS as $method => $foo) { diff --git a/engine/lib/upgrades/2011052801.php b/engine/lib/upgrades/2011052801.php index 8084bc06c..d68e0118e 100644 --- a/engine/lib/upgrades/2011052801.php +++ b/engine/lib/upgrades/2011052801.php @@ -2,7 +2,7 @@ /** * Make sure all users have the relationship member_of_site */ -global $DB_QUERY_CACHE, $DB_PROFILE, $ENTITY_CACHE, $CONFIG; +global $DB_QUERY_CACHE, $ENTITY_CACHE; $db_prefix = get_config('dbprefix'); $limit = 100; @@ -17,7 +17,7 @@ $q = "SELECT e.* FROM {$db_prefix}entities e $users = get_data($q); while ($users) { - $DB_QUERY_CACHE = $DB_PROFILE = $ENTITY_CACHE = array(); + $DB_QUERY_CACHE = $ENTITY_CACHE = array(); // do manually to not trigger any events because these aren't new users. foreach ($users as $user) { -- cgit v1.2.3 From 25de363c7c89e04391bea72eaef0f5913cf485c0 Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 13 Apr 2013 13:28:18 -0400 Subject: cleanup of entity caching code --- engine/classes/ElggEntity.php | 6 ++-- engine/classes/ElggGroup.php | 2 +- engine/classes/ElggObject.php | 2 +- engine/classes/ElggSite.php | 2 +- engine/classes/ElggUser.php | 2 +- engine/lib/entities.php | 59 +++++++++++--------------------------- engine/lib/river.php | 6 ++-- engine/lib/users.php | 12 ++++---- mod/pages/actions/pages/delete.php | 2 +- 9 files changed, 34 insertions(+), 59 deletions(-) diff --git a/engine/classes/ElggEntity.php b/engine/classes/ElggEntity.php index 5a63c7b15..8b3ceb551 100644 --- a/engine/classes/ElggEntity.php +++ b/engine/classes/ElggEntity.php @@ -1270,7 +1270,7 @@ abstract class ElggEntity extends ElggData implements public function save() { $guid = $this->getGUID(); if ($guid > 0) { - cache_entity($this); + _elgg_cache_entity($this); return update_entity( $guid, @@ -1320,7 +1320,7 @@ abstract class ElggEntity extends ElggData implements $this->attributes['subtype'] = get_subtype_id($this->attributes['type'], $this->attributes['subtype']); - cache_entity($this); + _elgg_cache_entity($this); return $this->attributes['guid']; } @@ -1362,7 +1362,7 @@ abstract class ElggEntity extends ElggData implements // Cache object handle if ($this->attributes['guid']) { - cache_entity($this); + _elgg_cache_entity($this); } return true; diff --git a/engine/classes/ElggGroup.php b/engine/classes/ElggGroup.php index 7ab0bfa48..61f9163d5 100644 --- a/engine/classes/ElggGroup.php +++ b/engine/classes/ElggGroup.php @@ -335,7 +335,7 @@ class ElggGroup extends ElggEntity $this->attributes = $attrs; $this->attributes['tables_loaded'] = 2; - cache_entity($this); + _elgg_cache_entity($this); return true; } diff --git a/engine/classes/ElggObject.php b/engine/classes/ElggObject.php index 3cb76ffaf..d54752dca 100644 --- a/engine/classes/ElggObject.php +++ b/engine/classes/ElggObject.php @@ -107,7 +107,7 @@ class ElggObject extends ElggEntity { $this->attributes = $attrs; $this->attributes['tables_loaded'] = 2; - cache_entity($this); + _elgg_cache_entity($this); return true; } diff --git a/engine/classes/ElggSite.php b/engine/classes/ElggSite.php index deba5087e..dd996fe98 100644 --- a/engine/classes/ElggSite.php +++ b/engine/classes/ElggSite.php @@ -124,7 +124,7 @@ class ElggSite extends ElggEntity { $this->attributes = $attrs; $this->attributes['tables_loaded'] = 2; - cache_entity($this); + _elgg_cache_entity($this); return true; } diff --git a/engine/classes/ElggUser.php b/engine/classes/ElggUser.php index b80065b27..6d9f10b57 100644 --- a/engine/classes/ElggUser.php +++ b/engine/classes/ElggUser.php @@ -112,7 +112,7 @@ class ElggUser extends ElggEntity $this->attributes = $attrs; $this->attributes['tables_loaded'] = 2; - cache_entity($this); + _elgg_cache_entity($this); return true; } diff --git a/engine/lib/entities.php b/engine/lib/entities.php index 156eec040..cb972b282 100644 --- a/engine/lib/entities.php +++ b/engine/lib/entities.php @@ -30,10 +30,10 @@ $SUBTYPE_CACHE = null; * * @param int $guid The entity guid * - * @return null + * @return void * @access private */ -function invalidate_cache_for_entity($guid) { +function _elgg_invalidate_cache_for_entity($guid) { global $ENTITY_CACHE; $guid = (int)$guid; @@ -50,13 +50,13 @@ function invalidate_cache_for_entity($guid) { * * @param ElggEntity $entity Entity to cache * - * @return null - * @see retrieve_cached_entity() - * @see invalidate_cache_for_entity() + * @return void + * @see _elgg_retrieve_cached_entity() + * @see _elgg_invalidate_cache_for_entity() * @access private - * TODO(evan): Use an ElggCache object + * @todo Use an ElggCache object */ -function cache_entity(ElggEntity $entity) { +function _elgg_cache_entity(ElggEntity $entity) { global $ENTITY_CACHE; // Don't cache non-plugin entities while access control is off, otherwise they could be @@ -66,7 +66,7 @@ function cache_entity(ElggEntity $entity) { } // Don't store too many or we'll have memory problems - // TODO(evan): Pick a less arbitrary limit + // @todo Pick a less arbitrary limit if (count($ENTITY_CACHE) > 256) { $random_guid = array_rand($ENTITY_CACHE); @@ -88,11 +88,11 @@ function cache_entity(ElggEntity $entity) { * @param int $guid The guid * * @return ElggEntity|bool false if entity not cached, or not fully loaded - * @see cache_entity() - * @see invalidate_cache_for_entity() + * @see _elgg_cache_entity() + * @see _elgg_invalidate_cache_for_entity() * @access private */ -function retrieve_cached_entity($guid) { +function _elgg_retrieve_cached_entity($guid) { global $ENTITY_CACHE; if (isset($ENTITY_CACHE[$guid])) { @@ -104,31 +104,6 @@ function retrieve_cached_entity($guid) { return false; } -/** - * As retrieve_cached_entity, but returns the result as a stdClass - * (compatible with load functions that expect a database row.) - * - * @param int $guid The guid - * - * @return mixed - * @todo unused - * @access private - */ -function retrieve_cached_entity_row($guid) { - $obj = retrieve_cached_entity($guid); - if ($obj) { - $tmp = new stdClass; - - foreach ($obj as $k => $v) { - $tmp->$k = $v; - } - - return $tmp; - } - - return false; -} - /** * Return the id for a given subtype. * @@ -745,7 +720,7 @@ function get_entity($guid) { } // Check local cache first - $new_entity = retrieve_cached_entity($guid); + $new_entity = _elgg_retrieve_cached_entity($guid); if ($new_entity) { return $new_entity; } @@ -782,7 +757,7 @@ function get_entity($guid) { } if ($new_entity) { - cache_entity($new_entity); + _elgg_cache_entity($new_entity); } return $new_entity; } @@ -1037,7 +1012,7 @@ function elgg_get_entities(array $options = array()) { foreach ($dt as $item) { // A custom callback could result in items that aren't ElggEntity's, so check for them if ($item instanceof ElggEntity) { - cache_entity($item); + _elgg_cache_entity($item); // plugins usually have only settings if (!$item instanceof ElggPlugin) { $guids[] = $item->guid; @@ -1102,7 +1077,7 @@ function _elgg_fetch_entities_from_sql($sql) { if (empty($row->guid) || empty($row->type)) { throw new LogicException('Entity row missing guid or type'); } - if ($entity = retrieve_cached_entity($row->guid)) { + if ($entity = _elgg_retrieve_cached_entity($row->guid)) { $rows[$i] = $entity; continue; } @@ -1628,7 +1603,7 @@ function disable_entity($guid, $reason = "", $recursive = true) { $entity->disableMetadata(); $entity->disableAnnotations(); - invalidate_cache_for_entity($guid); + _elgg_invalidate_cache_for_entity($guid); $res = update_data("UPDATE {$CONFIG->dbprefix}entities SET enabled = 'no' @@ -1726,7 +1701,7 @@ function delete_entity($guid, $recursive = true) { // delete cache if (isset($ENTITY_CACHE[$guid])) { - invalidate_cache_for_entity($guid); + _elgg_invalidate_cache_for_entity($guid); } // If memcache is available then delete this entry from the cache diff --git a/engine/lib/river.php b/engine/lib/river.php index f2ec1e101..4926a85c4 100644 --- a/engine/lib/river.php +++ b/engine/lib/river.php @@ -380,10 +380,10 @@ function _elgg_prefetch_river_entities(array $river_items) { // prefetch objects and subjects $guids = array(); foreach ($river_items as $item) { - if ($item->subject_guid && !retrieve_cached_entity($item->subject_guid)) { + if ($item->subject_guid && !_elgg_retrieve_cached_entity($item->subject_guid)) { $guids[$item->subject_guid] = true; } - if ($item->object_guid && !retrieve_cached_entity($item->object_guid)) { + if ($item->object_guid && !_elgg_retrieve_cached_entity($item->object_guid)) { $guids[$item->object_guid] = true; } } @@ -402,7 +402,7 @@ function _elgg_prefetch_river_entities(array $river_items) { $guids = array(); foreach ($river_items as $item) { $object = $item->getObjectEntity(); - if ($object->container_guid && !retrieve_cached_entity($object->container_guid)) { + if ($object->container_guid && !_elgg_retrieve_cached_entity($object->container_guid)) { $guids[$object->container_guid] = true; } } diff --git a/engine/lib/users.php b/engine/lib/users.php index 4a585c07f..868cd7815 100644 --- a/engine/lib/users.php +++ b/engine/lib/users.php @@ -237,7 +237,7 @@ function make_user_admin($user_guid) { } $r = update_data("UPDATE {$CONFIG->dbprefix}users_entity set admin='yes' where guid=$user_guid"); - invalidate_cache_for_entity($user_guid); + _elgg_invalidate_cache_for_entity($user_guid); return $r; } @@ -273,7 +273,7 @@ function remove_user_admin($user_guid) { } $r = update_data("UPDATE {$CONFIG->dbprefix}users_entity set admin='no' where guid=$user_guid"); - invalidate_cache_for_entity($user_guid); + _elgg_invalidate_cache_for_entity($user_guid); return $r; } @@ -558,8 +558,8 @@ function get_user_by_username($username) { // Caching if ((isset($USERNAME_TO_GUID_MAP_CACHE[$username])) - && (retrieve_cached_entity($USERNAME_TO_GUID_MAP_CACHE[$username]))) { - return retrieve_cached_entity($USERNAME_TO_GUID_MAP_CACHE[$username]); + && (_elgg_retrieve_cached_entity($USERNAME_TO_GUID_MAP_CACHE[$username]))) { + return _elgg_retrieve_cached_entity($USERNAME_TO_GUID_MAP_CACHE[$username]); } $query = "SELECT e.* from {$CONFIG->dbprefix}users_entity u @@ -592,9 +592,9 @@ function get_user_by_code($code) { // Caching if ((isset($CODE_TO_GUID_MAP_CACHE[$code])) - && (retrieve_cached_entity($CODE_TO_GUID_MAP_CACHE[$code]))) { + && (_elgg_retrieve_cached_entity($CODE_TO_GUID_MAP_CACHE[$code]))) { - return retrieve_cached_entity($CODE_TO_GUID_MAP_CACHE[$code]); + return _elgg_retrieve_cached_entity($CODE_TO_GUID_MAP_CACHE[$code]); } $query = "SELECT e.* from {$CONFIG->dbprefix}users_entity u diff --git a/mod/pages/actions/pages/delete.php b/mod/pages/actions/pages/delete.php index c99f15fbf..fd5791e4d 100644 --- a/mod/pages/actions/pages/delete.php +++ b/mod/pages/actions/pages/delete.php @@ -40,7 +40,7 @@ if (elgg_instanceof($page, 'object', 'page') || elgg_instanceof($page, 'object', 'metadata_name' => 'parent_guid', )); - invalidate_cache_for_entity($child_guid); + _elgg_invalidate_cache_for_entity($child_guid); if ($newentity_cache) { $newentity_cache->delete($child_guid); } -- cgit v1.2.3 From 5dd3d179f43b47231dab5dab5d5d9482a9810ef7 Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 13 Apr 2013 13:36:56 -0400 Subject: removed query cache reset from unit test of acl. If this is needed, is bug in elgg core --- engine/tests/api/access_collections.php | 3 --- 1 file changed, 3 deletions(-) diff --git a/engine/tests/api/access_collections.php b/engine/tests/api/access_collections.php index ebcd7d318..4acfae596 100644 --- a/engine/tests/api/access_collections.php +++ b/engine/tests/api/access_collections.php @@ -54,7 +54,6 @@ class ElggCoreAccessCollectionsTest extends ElggCoreUnitTest { } public function testCreateGetDeleteACL() { - global $DB_QUERY_CACHE; $acl_name = 'test access collection'; $acl_id = create_access_collection($acl_name); @@ -67,8 +66,6 @@ class ElggCoreAccessCollectionsTest extends ElggCoreUnitTest { $this->assertEqual($acl->id, $acl_id); if ($acl) { - $DB_QUERY_CACHE = array(); - $this->assertEqual($acl->name, $acl_name); $result = delete_access_collection($acl_id); -- cgit v1.2.3 From 726007e5730f83340ea8ab294a9f5951586f42fc Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 13 Apr 2013 14:10:02 -0400 Subject: introduces _elgg_invalidate_query_cache() to dry up the db query cache code --- engine/classes/ElggBatch.php | 5 +-- engine/lib/database.php | 38 +++++++++------------- engine/lib/upgrade.php | 16 ++++----- engine/lib/upgrades/2009102801.php | 5 +-- engine/lib/upgrades/2010061501.php | 6 ++-- engine/lib/upgrades/2010071001.php | 5 +-- engine/lib/upgrades/2010071002.php | 5 +-- engine/lib/upgrades/2011052801.php | 5 +-- ....8.13-update_user_location-8999eb8bf1bdd9a3.php | 4 +-- 9 files changed, 42 insertions(+), 47 deletions(-) diff --git a/engine/classes/ElggBatch.php b/engine/classes/ElggBatch.php index 5d59425d0..1912f89a2 100644 --- a/engine/classes/ElggBatch.php +++ b/engine/classes/ElggBatch.php @@ -229,8 +229,9 @@ class ElggBatch private function getNextResultsChunk() { // reset memory caches after first chunk load if ($this->chunkIndex > 0) { - global $DB_QUERY_CACHE, $ENTITY_CACHE; - $DB_QUERY_CACHE = $ENTITY_CACHE = array(); + global $ENTITY_CACHE; + $ENTITY_CACHE = array(); + _elgg_invalidate_query_cache(); } // always reset results. diff --git a/engine/lib/database.php b/engine/lib/database.php index 18235149d..b41eb4cda 100644 --- a/engine/lib/database.php +++ b/engine/lib/database.php @@ -460,19 +460,12 @@ function elgg_query_runner($query, $callback = null, $single = false) { * @access private */ function insert_data($query) { - global $DB_QUERY_CACHE; elgg_log("DB query $query", 'NOTICE'); $dblink = get_db_link('write'); - // Invalidate query cache - if ($DB_QUERY_CACHE) { - /* @var ElggStaticVariableCache $DB_QUERY_CACHE */ - $DB_QUERY_CACHE->clear(); - } - - elgg_log("Query cache invalidated", 'NOTICE'); + _elgg_invalidate_query_cache(); if (execute_query("$query", $dblink)) { return mysql_insert_id($dblink); @@ -492,18 +485,12 @@ function insert_data($query) { * @access private */ function update_data($query) { - global $DB_QUERY_CACHE; elgg_log("DB query $query", 'NOTICE'); $dblink = get_db_link('write'); - // Invalidate query cache - if ($DB_QUERY_CACHE) { - /* @var ElggStaticVariableCache $DB_QUERY_CACHE */ - $DB_QUERY_CACHE->clear(); - elgg_log("Query cache invalidated", 'NOTICE'); - } + _elgg_invalidate_query_cache(); if (execute_query("$query", $dblink)) { return TRUE; @@ -523,18 +510,12 @@ function update_data($query) { * @access private */ function delete_data($query) { - global $DB_QUERY_CACHE; elgg_log("DB query $query", 'NOTICE'); $dblink = get_db_link('write'); - // Invalidate query cache - if ($DB_QUERY_CACHE) { - /* @var ElggStaticVariableCache $DB_QUERY_CACHE */ - $DB_QUERY_CACHE->clear(); - elgg_log("Query cache invalidated", 'NOTICE'); - } + _elgg_invalidate_query_cache(); if (execute_query("$query", $dblink)) { return mysql_affected_rows($dblink); @@ -543,6 +524,19 @@ function delete_data($query) { return FALSE; } +/** + * Invalidate the query cache + * + * @access private + */ +function _elgg_invalidate_query_cache() { + global $DB_QUERY_CACHE; + if ($DB_QUERY_CACHE) { + /* @var ElggStaticVariableCache $DB_QUERY_CACHE */ + $DB_QUERY_CACHE->clear(); + elgg_log("Query cache invalidated", 'NOTICE'); + } +} /** * Return tables matching the database prefix {@link $CONFIG->dbprefix}% in the currently diff --git a/engine/lib/upgrade.php b/engine/lib/upgrade.php index d684af862..0cc1e64dc 100644 --- a/engine/lib/upgrade.php +++ b/engine/lib/upgrade.php @@ -354,16 +354,12 @@ function _elgg_upgrade_unlock() { * @access private */ function _elgg_upgrade_is_locked() { - global $CONFIG, $DB_QUERY_CACHE; - + global $CONFIG; + $is_locked = count(get_data("show tables like '{$CONFIG->dbprefix}upgrade_lock'")); - - // Invalidate query cache - if ($DB_QUERY_CACHE) { - /* @var ElggStaticVariableCache $DB_QUERY_CACHE */ - $DB_QUERY_CACHE->clear(); - elgg_log("Query cache invalidated", 'NOTICE'); - } - + + // @todo why? + _elgg_invalidate_query_cache(); + return $is_locked; } diff --git a/engine/lib/upgrades/2009102801.php b/engine/lib/upgrades/2009102801.php index b91b99d95..3ad113fb2 100644 --- a/engine/lib/upgrades/2009102801.php +++ b/engine/lib/upgrades/2009102801.php @@ -203,14 +203,15 @@ function user_file_matrix($guid) { return "$time_created/$user->guid/"; } -global $DB_QUERY_CACHE, $ENTITY_CACHE, $CONFIG; +global $ENTITY_CACHE, $CONFIG; /** * Upgrade file locations */ $users = mysql_query("SELECT guid, username FROM {$CONFIG->dbprefix}users_entity WHERE username != ''"); while ($user = mysql_fetch_object($users)) { - $DB_QUERY_CACHE = $ENTITY_CACHE = array(); + $ENTITY_CACHE = array(); + _elgg_invalidate_query_cache(); $to = $CONFIG->dataroot . user_file_matrix($user->guid); foreach (array('1_0', '1_1', '1_6') as $version) { diff --git a/engine/lib/upgrades/2010061501.php b/engine/lib/upgrades/2010061501.php index b23ad0820..744c28fd5 100644 --- a/engine/lib/upgrades/2010061501.php +++ b/engine/lib/upgrades/2010061501.php @@ -45,7 +45,7 @@ if ($dbversion < 2009100701) { } } - global $DB_QUERY_CACHE, $ENTITY_CACHE; + global $ENTITY_CACHE; /** Upgrade file locations @@ -60,7 +60,9 @@ if ($dbversion < 2009100701) { $users = mysql_query("SELECT guid, username FROM {$CONFIG->dbprefix}users_entity WHERE username != ''", $link); while ($user = mysql_fetch_object($users)) { - $DB_QUERY_CACHE = $ENTITY_CACHE = array(); + $ENTITY_CACHE = array(); + _elgg_invalidate_query_cache(); + $to = $CONFIG->dataroot . user_file_matrix($user->guid); foreach (array('1_0', '1_1', '1_6') as $version) { diff --git a/engine/lib/upgrades/2010071001.php b/engine/lib/upgrades/2010071001.php index 34f5a773e..5594493a8 100644 --- a/engine/lib/upgrades/2010071001.php +++ b/engine/lib/upgrades/2010071001.php @@ -30,11 +30,12 @@ function user_file_matrix_2010071001($guid) { $sizes = array('large', 'medium', 'small', 'tiny', 'master', 'topbar'); -global $DB_QUERY_CACHE, $ENTITY_CACHE, $CONFIG; +global $ENTITY_CACHE, $CONFIG; $users = mysql_query("SELECT guid, username FROM {$CONFIG->dbprefix}users_entity WHERE username != ''"); while ($user = mysql_fetch_object($users)) { - $DB_QUERY_CACHE = $ENTITY_CACHE = array(); + $ENTITY_CACHE = array(); + _elgg_invalidate_query_cache(); $user_directory = user_file_matrix_2010071001($user->guid); if (!$user_directory) { diff --git a/engine/lib/upgrades/2010071002.php b/engine/lib/upgrades/2010071002.php index d1c74ed48..52aa15ef5 100644 --- a/engine/lib/upgrades/2010071002.php +++ b/engine/lib/upgrades/2010071002.php @@ -4,12 +4,13 @@ */ // loop through all users checking collections and notifications -global $DB_QUERY_CACHE, $ENTITY_CACHE, $CONFIG; +global $ENTITY_CACHE, $CONFIG; global $NOTIFICATION_HANDLERS; $users = mysql_query("SELECT guid, username FROM {$CONFIG->dbprefix}users_entity WHERE username != ''"); while ($user = mysql_fetch_object($users)) { - $DB_QUERY_CACHE = $ENTITY_CACHE = array(); + $ENTITY_CACHE = array(); + _elgg_invalidate_query_cache(); $user = get_entity($user->guid); foreach ($NOTIFICATION_HANDLERS as $method => $foo) { diff --git a/engine/lib/upgrades/2011052801.php b/engine/lib/upgrades/2011052801.php index d68e0118e..b5a8e1018 100644 --- a/engine/lib/upgrades/2011052801.php +++ b/engine/lib/upgrades/2011052801.php @@ -2,7 +2,7 @@ /** * Make sure all users have the relationship member_of_site */ -global $DB_QUERY_CACHE, $ENTITY_CACHE; +global $ENTITY_CACHE; $db_prefix = get_config('dbprefix'); $limit = 100; @@ -17,7 +17,8 @@ $q = "SELECT e.* FROM {$db_prefix}entities e $users = get_data($q); while ($users) { - $DB_QUERY_CACHE = $ENTITY_CACHE = array(); + $ENTITY_CACHE = array(); + _elgg_invalidate_query_cache(); // do manually to not trigger any events because these aren't new users. foreach ($users as $user) { diff --git a/engine/lib/upgrades/2013030600-1.8.13-update_user_location-8999eb8bf1bdd9a3.php b/engine/lib/upgrades/2013030600-1.8.13-update_user_location-8999eb8bf1bdd9a3.php index b38eb5100..8eccf05e2 100644 --- a/engine/lib/upgrades/2013030600-1.8.13-update_user_location-8999eb8bf1bdd9a3.php +++ b/engine/lib/upgrades/2013030600-1.8.13-update_user_location-8999eb8bf1bdd9a3.php @@ -7,8 +7,6 @@ * This script turns that back into a string. */ -global $DB_QUERY_CACHE; - $ia = elgg_set_ignore_access(true); $options = array( 'type' => 'user', @@ -17,7 +15,7 @@ $options = array( $batch = new ElggBatch('elgg_get_entities', $options); foreach ($batch as $entity) { - $DB_QUERY_CACHE = array(); + _elgg_invalidate_query_cache(); if (is_array($entity->location)) { $entity->location = implode(', ', $entity->location); -- cgit v1.2.3 From 53b226d0e3d81c0cf45b2b39b506275a7a149ebe Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 13 Apr 2013 14:28:01 -0400 Subject: adds LRU cache --- engine/classes/ElggLRUCache.php | 132 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 engine/classes/ElggLRUCache.php diff --git a/engine/classes/ElggLRUCache.php b/engine/classes/ElggLRUCache.php new file mode 100644 index 000000000..add78ecb4 --- /dev/null +++ b/engine/classes/ElggLRUCache.php @@ -0,0 +1,132 @@ +maximumSize = $size; + } + + /** + * Get the value cached with this key + * + * @param int|string $key The key. Strings that are ints are cast to ints. + * @param mixed $default The value to be returned if key not found. (Optional) + * @return mixed + */ + public function get($key, $default = null) { + if (isset($this->data[$key])) { + $this->recordAccess($key); + return $this->data[$key]; + } else { + return $default; + } + } + + /** + * Put something in the cache + * + * @param int|string $key The key. Strings that are ints are cast to ints. + * @param mixed $value The value to cache + * @return void + */ + public function put($key, $value) { + if (isset($this->data[$key])) { + $this->data[$key] = $value; + $this->recordAccess($key); + } else { + $this->data[$key] = $value; + if ($this->size() > $this->maximumSize) { + // remove least recently used element (front of array) + reset($this->data); + unset($this->data[key($this->data)]); + } + } + } + + /** + * Get the number of elements in the cache + * + * @return int + */ + public function size() { + return count($this->data); + } + + /** + * Does the cache contain an element with this key + * + * @param int|string $key The key + * @return boolean + */ + public function containsKey($key) { + return isset($this->data[$key]); + } + + /** + * Remove the element with this key. + * + * @param int|string $key The key + * @return mixed Value or null if not set + */ + public function remove($key) { + if (isset($this->data[$key])) { + $value = $this->data[$key]; + unset($this->data[$key]); + return $value; + } else { + return null; + } + } + + /** + * Clear the cache + * + * @return void + */ + public function clear() { + $this->data = array(); + } + + /** + * Moves the element from current position to end of array + * + * @param int|string $key The key + * @return void + */ + protected function recordAccess($key) { + $value = $this->data[$key]; + unset($this->data[$key]); + $this->data[$key] = $value; + } +} -- cgit v1.2.3 From fc9a1e985b0b20c74a913f22ec9ea7e0c16848a7 Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 13 Apr 2013 14:35:24 -0400 Subject: updated lru cache so easier to integrate --- engine/classes/ElggLRUCache.php | 55 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/engine/classes/ElggLRUCache.php b/engine/classes/ElggLRUCache.php index add78ecb4..90e63bb61 100644 --- a/engine/classes/ElggLRUCache.php +++ b/engine/classes/ElggLRUCache.php @@ -13,7 +13,7 @@ * @package Elgg.Core * @subpackage Cache */ -class ElggLRUCache { +class ElggLRUCache implements ArrayAccess { /** @var int */ protected $maximumSize; @@ -54,13 +54,13 @@ class ElggLRUCache { } /** - * Put something in the cache + * Add something to the cache * * @param int|string $key The key. Strings that are ints are cast to ints. * @param mixed $value The value to cache * @return void */ - public function put($key, $value) { + public function set($key, $value) { if (isset($this->data[$key])) { $this->data[$key] = $value; $this->recordAccess($key); @@ -129,4 +129,53 @@ class ElggLRUCache { unset($this->data[$key]); $this->data[$key] = $value; } + + /** + * Assigns a value for the specified key + * + * @see ArrayAccess::offsetSet() + * + * @param int|string $key The key to assign the value to. + * @param mixed $value The value to set. + * @return void + */ + function offsetSet($key, $value) { + $this->set($key, $value); + } + + /** + * Get the value for specified key + * + * @see ArrayAccess::offsetGet() + * + * @param int|string $key The key to retrieve. + * @return mixed + */ + function offsetGet($key) { + return $this->get($key); + } + + /** + * Unsets a key. + * + * @see ArrayAccess::offsetUnset() + * + * @param int|string $key The key to unset. + * @return void + */ + function offsetUnset($key) { + $this->remove($key); + } + + /** + * Does key exist? + * + * @see ArrayAccess::offsetExists() + * + * @param int|string $key A key to check for. + * @return boolean + */ + function offsetExists($key) { + return $this->containsKey($key); + } } -- cgit v1.2.3 From 21c92cf39d719a913cd9e29474c04781e9cd72ef Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 13 Apr 2013 15:05:40 -0400 Subject: Fixes #4978 integrates LRU cache for db query cache --- engine/classes/ElggLRUCache.php | 8 ++++---- engine/lib/database.php | 29 +++++++++++++++-------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/engine/classes/ElggLRUCache.php b/engine/classes/ElggLRUCache.php index 90e63bb61..f51af2ed7 100644 --- a/engine/classes/ElggLRUCache.php +++ b/engine/classes/ElggLRUCache.php @@ -139,7 +139,7 @@ class ElggLRUCache implements ArrayAccess { * @param mixed $value The value to set. * @return void */ - function offsetSet($key, $value) { + public function offsetSet($key, $value) { $this->set($key, $value); } @@ -151,7 +151,7 @@ class ElggLRUCache implements ArrayAccess { * @param int|string $key The key to retrieve. * @return mixed */ - function offsetGet($key) { + public function offsetGet($key) { return $this->get($key); } @@ -163,7 +163,7 @@ class ElggLRUCache implements ArrayAccess { * @param int|string $key The key to unset. * @return void */ - function offsetUnset($key) { + public function offsetUnset($key) { $this->remove($key); } @@ -175,7 +175,7 @@ class ElggLRUCache implements ArrayAccess { * @param int|string $key A key to check for. * @return boolean */ - function offsetExists($key) { + public function offsetExists($key) { return $this->containsKey($key); } } diff --git a/engine/lib/database.php b/engine/lib/database.php index b41eb4cda..3553d787d 100644 --- a/engine/lib/database.php +++ b/engine/lib/database.php @@ -12,18 +12,19 @@ /** * Query cache for all queries. * - * Each query and its results are stored in this array as: + * Each query and its results are stored in this cache as: * - * $DB_QUERY_CACHE[$query] => array(result1, result2, ... resultN) + * $DB_QUERY_CACHE[query hash] => array(result1, result2, ... resultN) * + * @see elgg_query_runner() for details on the hash. * - * @warning be array this var may be an array or ElggStaticVariableCache depending on when called :( + * @warning Elgg used to set this as an empty array to turn off the cache * - * @global ElggStaticVariableCache|array $DB_QUERY_CACHE + * @global ElggLRUCache|null $DB_QUERY_CACHE * @access private */ global $DB_QUERY_CACHE; -$DB_QUERY_CACHE = array(); +$DB_QUERY_CACHE = null; /** * Queries to be executed upon shutdown. @@ -127,9 +128,8 @@ function establish_db_link($dblinkname = "readwrite") { // Set up cache if global not initialized and query cache not turned off if ((!$DB_QUERY_CACHE) && (!$db_cache_off)) { - // @todo everywhere else this is assigned to array(), making it dangerous to call - // object methods on this. We should consider making this an plain array - $DB_QUERY_CACHE = new ElggStaticVariableCache('db_query_cache'); + // @todo if we keep this cache in 1.9, expose the size as a config parameter + $DB_QUERY_CACHE = new ElggLRUCache(200); } } @@ -404,11 +404,9 @@ function elgg_query_runner($query, $callback = null, $single = false) { // Is cached? if ($DB_QUERY_CACHE) { - $cached_query = $DB_QUERY_CACHE[$hash]; - - if ($cached_query !== FALSE) { + if (isset($DB_QUERY_CACHE[$hash])) { elgg_log("DB query $query results returned from cache (hash: $hash)", 'NOTICE'); - return $cached_query; + return $DB_QUERY_CACHE[$hash]; } } @@ -531,10 +529,13 @@ function delete_data($query) { */ function _elgg_invalidate_query_cache() { global $DB_QUERY_CACHE; - if ($DB_QUERY_CACHE) { - /* @var ElggStaticVariableCache $DB_QUERY_CACHE */ + if ($DB_QUERY_CACHE instanceof ElggLRUCache) { $DB_QUERY_CACHE->clear(); elgg_log("Query cache invalidated", 'NOTICE'); + } elseif ($DB_QUERY_CACHE) { + // In case someone sets the cache to an array and primes it with data + $DB_QUERY_CACHE = array(); + elgg_log("Query cache invalidated", 'NOTICE'); } } -- cgit v1.2.3 From ee23c308b4c09f449d7639e36e922d7864e91aa1 Mon Sep 17 00:00:00 2001 From: cash Date: Sat, 13 Apr 2013 17:56:31 -0400 Subject: Fixes #5301 Removes twitter widget - no longer supported by core developers --- mod/twitter/graphics/thewire_speech_bubble.gif | Bin 560 -> 0 bytes mod/twitter/graphics/twitter16px.png | Bin 724 -> 0 bytes mod/twitter/languages/en.php | 17 ------ mod/twitter/manifest.xml | 16 ------ mod/twitter/start.php | 14 ----- mod/twitter/views/default/twitter/css.php | 63 --------------------- .../views/default/widgets/twitter/content.php | 42 -------------- mod/twitter/views/default/widgets/twitter/edit.php | 24 -------- 8 files changed, 176 deletions(-) delete mode 100644 mod/twitter/graphics/thewire_speech_bubble.gif delete mode 100644 mod/twitter/graphics/twitter16px.png delete mode 100644 mod/twitter/languages/en.php delete mode 100644 mod/twitter/manifest.xml delete mode 100644 mod/twitter/start.php delete mode 100644 mod/twitter/views/default/twitter/css.php delete mode 100644 mod/twitter/views/default/widgets/twitter/content.php delete mode 100644 mod/twitter/views/default/widgets/twitter/edit.php diff --git a/mod/twitter/graphics/thewire_speech_bubble.gif b/mod/twitter/graphics/thewire_speech_bubble.gif deleted file mode 100644 index d0e8606a1..000000000 Binary files a/mod/twitter/graphics/thewire_speech_bubble.gif and /dev/null differ diff --git a/mod/twitter/graphics/twitter16px.png b/mod/twitter/graphics/twitter16px.png deleted file mode 100644 index de51c6953..000000000 Binary files a/mod/twitter/graphics/twitter16px.png and /dev/null differ diff --git a/mod/twitter/languages/en.php b/mod/twitter/languages/en.php deleted file mode 100644 index 11e745ba1..000000000 --- a/mod/twitter/languages/en.php +++ /dev/null @@ -1,17 +0,0 @@ - 'Twitter', - 'twitter:info' => 'Display your latest tweets', - 'twitter:username' => 'Your twitter username', - 'twitter:num' => 'Number of tweets to show*', - 'twitter:visit' => 'visit my twitter', - 'twitter:notset' => 'This widget needs to be configured. To display your latest tweets, click the customize icon and fill in your Twitter username.', - 'twitter:invalid' => 'This widget is configured with an invalid Twitter username. Click the customize icon to correct it.', - 'twitter:apibug' => "*Due to a bug in the Twitter 1.0 API, you may see fewer tweets than you ask for.", -); - -add_translation("en", $english); diff --git a/mod/twitter/manifest.xml b/mod/twitter/manifest.xml deleted file mode 100644 index 18fa8c957..000000000 --- a/mod/twitter/manifest.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - Twitter Widget - Core developers - 1.7 - bundled - widget - Elgg simple twitter widget - http://www.elgg.org/ - See COPYRIGHT.txt - GNU General Public License version 2 - - elgg_release - 1.8 - - diff --git a/mod/twitter/start.php b/mod/twitter/start.php deleted file mode 100644 index b793eadf0..000000000 --- a/mod/twitter/start.php +++ /dev/null @@ -1,14 +0,0 @@ - - -#twitter_widget { - margin:0 10px 0 10px; -} -#twitter_widget ul { - margin:0; - padding:0; -} -#twitter_widget li { - list-style-image:none; - list-style-position:outside; - list-style-type:none; - margin:0 0 5px 0; - padding:0; - overflow-x: hidden; - border: 2px solid #dedede; - -webkit-border-radius: 8px; - -moz-border-radius: 8px; - border-radius: 8px; -} -#twitter_widget li span { - color:#666666; - background:white; - - -webkit-border-radius: 8px; - -moz-border-radius: 8px; - border-radius: 8px; - - padding:5px; - display:block; -} -p.visit_twitter a { - background:url(mod/twitter/graphics/twitter16px.png) left no-repeat; - padding:0 0 0 20px; - margin:0; -} -p.twitter_username .input-text { - width:200px; -} -.visit_twitter { - background:white; - - -webkit-border-radius: 8px; - -moz-border-radius: 8px; - border-radius: 8px; - - padding:2px; - margin:0 0 5px 0; -} -#twitter_widget li > a { - display:block; - margin:0 0 0 4px; -} -#twitter_widget li span a { - display:inline !important; -} \ No newline at end of file diff --git a/mod/twitter/views/default/widgets/twitter/content.php b/mod/twitter/views/default/widgets/twitter/content.php deleted file mode 100644 index caefd369a..000000000 --- a/mod/twitter/views/default/widgets/twitter/content.php +++ /dev/null @@ -1,42 +0,0 @@ -twitter_username; - -if (empty($username)) { - echo "

" . elgg_echo("twitter:notset") . "

"; - return; -} - -$username_is_valid = preg_match('~^[a-zA-Z0-9_]{1,20}$~', $username); -if (!$username_is_valid) { - echo "

" . elgg_echo("twitter:invalid") . "

"; - return; -} - - -$num = $vars['entity']->twitter_num; -if (empty($num)) { - $num = 5; -} - -// @todo upgrade to 1.1 API https://dev.twitter.com/docs/api/1.1/get/statuses/home_timeline -$script_url = "https://api.twitter.com/1/statuses/user_timeline/" . urlencode($username) . ".json" - . "?callback=twitterCallback2&count=" . (int) $num; - -?> -
-
    -

    elgg_echo("twitter:visit"), - 'href' => 'http://twitter.com/' . urlencode($username), - 'is_trusted' => true, - )) ?>

    - - -
    diff --git a/mod/twitter/views/default/widgets/twitter/edit.php b/mod/twitter/views/default/widgets/twitter/edit.php deleted file mode 100644 index c3fc6f0d5..000000000 --- a/mod/twitter/views/default/widgets/twitter/edit.php +++ /dev/null @@ -1,24 +0,0 @@ - -
    - - 'params[twitter_username]', - 'value' => $vars['entity']->twitter_username, - )) ?> -
    -
    - - 'params[twitter_num]', - 'value' => $vars['entity']->twitter_num, - )) ?> - -
    \ No newline at end of file -- cgit v1.2.3 From 999f23c13e8c34f4cbae4af409446d037043d798 Mon Sep 17 00:00:00 2001 From: cash Date: Sun, 14 Apr 2013 19:32:03 -0400 Subject: Refs #5335 added unit test for underscore in relative path for JS url normalization --- js/tests/ElggLibTest.js | 1 + 1 file changed, 1 insertion(+) diff --git a/js/tests/ElggLibTest.js b/js/tests/ElggLibTest.js index 2a676e22a..31b561923 100644 --- a/js/tests/ElggLibTest.js +++ b/js/tests/ElggLibTest.js @@ -78,6 +78,7 @@ ElggLibTest.prototype.testNormalizeUrl = function() { ['https://example.com', 'https://example.com'], ['http://example-time.com', 'http://example-time.com'], ['//example.com', '//example.com'], + ['mod/my_plugin/graphics/image.jpg', elgg.config.wwwroot + 'mod/my_plugin/graphics/image.jpg'], ['ftp://example.com/file', 'ftp://example.com/file'], ['mailto:brett@elgg.org', 'mailto:brett@elgg.org'], -- cgit v1.2.3 From 037cce80328318f7151c498d48f983cabd886505 Mon Sep 17 00:00:00 2001 From: cash Date: Sun, 14 Apr 2013 19:59:30 -0400 Subject: entity and query cache are size limited so don't need cache clearing in ElggBatch --- engine/classes/ElggBatch.php | 6 ------ 1 file changed, 6 deletions(-) diff --git a/engine/classes/ElggBatch.php b/engine/classes/ElggBatch.php index 1912f89a2..eb93b0f5d 100644 --- a/engine/classes/ElggBatch.php +++ b/engine/classes/ElggBatch.php @@ -227,12 +227,6 @@ class ElggBatch * @return bool */ private function getNextResultsChunk() { - // reset memory caches after first chunk load - if ($this->chunkIndex > 0) { - global $ENTITY_CACHE; - $ENTITY_CACHE = array(); - _elgg_invalidate_query_cache(); - } // always reset results. $this->results = array(); -- cgit v1.2.3 From 6470154396269707e95c6dbbf40157f533928059 Mon Sep 17 00:00:00 2001 From: cash Date: Sun, 14 Apr 2013 20:16:47 -0400 Subject: Fixes #5358 updates text on twitter login --- mod/twitter_api/languages/en.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mod/twitter_api/languages/en.php b/mod/twitter_api/languages/en.php index f4b3c7f94..c19a058aa 100644 --- a/mod/twitter_api/languages/en.php +++ b/mod/twitter_api/languages/en.php @@ -25,7 +25,7 @@ $english = array( 'twitter_api:revoke:success' => 'Twitter access has been revoked.', - 'twitter_api:login' => 'Allow existing users who have connected their Twitter account to sign in with Twitter?', + 'twitter_api:login' => 'Allow users to sign in with Twitter?', 'twitter_api:new_users' => 'Allow new users to sign up using their Twitter account even if user registration is disabled?', 'twitter_api:login:success' => 'You have been logged in.', 'twitter_api:login:error' => 'Unable to login with Twitter.', -- cgit v1.2.3 From 97e797427da01fffe3c85b17f41427469b33ca32 Mon Sep 17 00:00:00 2001 From: cash Date: Sun, 14 Apr 2013 20:35:48 -0400 Subject: updated twitteroauth library to latest from github: https://github.com/abraham/twitteroauth/commit/61f5a550cd3643c619e84cb914ef1f22e491c270 (note: now conflicts with oauth_api library) --- mod/twitter_api/manifest.xml | 10 +- mod/twitter_api/vendors/twitteroauth/OAuth.php | 390 ++++++++++++++++++++- mod/twitter_api/vendors/twitteroauth/README | 117 ++++++- .../vendors/twitteroauth/twitterOAuth.php | 16 +- 4 files changed, 495 insertions(+), 38 deletions(-) diff --git a/mod/twitter_api/manifest.xml b/mod/twitter_api/manifest.xml index 86bba4b50..3af866bba 100644 --- a/mod/twitter_api/manifest.xml +++ b/mod/twitter_api/manifest.xml @@ -2,7 +2,7 @@ Twitter API Core developers - 1.8 + 1.8.15 Allows users to authenticate their Elgg account with Twitter. api bundled @@ -13,15 +13,15 @@ elgg_release 1.8 - - plugin - oauth_api - php_extension curl + + plugin + oauth_api + plugin twitterservice diff --git a/mod/twitter_api/vendors/twitteroauth/OAuth.php b/mod/twitter_api/vendors/twitteroauth/OAuth.php index e132a5bc8..e76304146 100644 --- a/mod/twitter_api/vendors/twitteroauth/OAuth.php +++ b/mod/twitter_api/vendors/twitteroauth/OAuth.php @@ -1,6 +1,12 @@ build_signature($request, $consumer, $token); + return $built == $signature; + } +} + +/** + * The HMAC-SHA1 signature method uses the HMAC-SHA1 signature algorithm as defined in [RFC2104] + * where the Signature Base String is the text and the key is the concatenated values (each first + * encoded per Parameter Encoding) of the Consumer Secret and Token Secret, separated by an '&' + * character (ASCII code 38) even if empty. + * - Chapter 9.2 ("HMAC-SHA1") + */ +class OAuthSignatureMethod_HMAC_SHA1 extends OAuthSignatureMethod { + function get_name() { return "HMAC-SHA1"; - }/*}}}*/ + } - public function build_signature($request, $consumer, $token) {/*{{{*/ + public function build_signature($request, $consumer, $token) { $base_string = $request->get_signature_base_string(); $request->base_string = $base_string; @@ -63,16 +113,111 @@ class twitterOAuthSignatureMethod_HMAC_SHA1 extends OAuthSignatureMethod_HMAC_SH $key_parts = OAuthUtil::urlencode_rfc3986($key_parts); $key = implode('&', $key_parts); - return base64_encode( hash_hmac('sha1', $base_string, $key, true)); - }/*}}}*/ + return base64_encode(hash_hmac('sha1', $base_string, $key, true)); + } +} - public function check_signature(&$request, $consumer, $token, $signature) { - $built = $this->build_signature($request, $consumer, $token); - return $built == $signature; +/** + * The PLAINTEXT method does not provide any security protection and SHOULD only be used + * over a secure channel such as HTTPS. It does not use the Signature Base String. + * - Chapter 9.4 ("PLAINTEXT") + */ +class OAuthSignatureMethod_PLAINTEXT extends OAuthSignatureMethod { + public function get_name() { + return "PLAINTEXT"; + } + + /** + * oauth_signature is set to the concatenated encoded values of the Consumer Secret and + * Token Secret, separated by a '&' character (ASCII code 38), even if either secret is + * empty. The result MUST be encoded again. + * - Chapter 9.4.1 ("Generating Signatures") + * + * Please note that the second encoding MUST NOT happen in the SignatureMethod, as + * OAuthRequest handles this! + */ + public function build_signature($request, $consumer, $token) { + $key_parts = array( + $consumer->secret, + ($token) ? $token->secret : "" + ); + + $key_parts = OAuthUtil::urlencode_rfc3986($key_parts); + $key = implode('&', $key_parts); + $request->base_string = $key; + + return $key; } -}/*}}}*/ +} + +/** + * The RSA-SHA1 signature method uses the RSASSA-PKCS1-v1_5 signature algorithm as defined in + * [RFC3447] section 8.2 (more simply known as PKCS#1), using SHA-1 as the hash function for + * EMSA-PKCS1-v1_5. It is assumed that the Consumer has provided its RSA public key in a + * verified way to the Service Provider, in a manner which is beyond the scope of this + * specification. + * - Chapter 9.3 ("RSA-SHA1") + */ +abstract class OAuthSignatureMethod_RSA_SHA1 extends OAuthSignatureMethod { + public function get_name() { + return "RSA-SHA1"; + } + + // Up to the SP to implement this lookup of keys. Possible ideas are: + // (1) do a lookup in a table of trusted certs keyed off of consumer + // (2) fetch via http using a url provided by the requester + // (3) some sort of specific discovery code based on request + // + // Either way should return a string representation of the certificate + protected abstract function fetch_public_cert(&$request); + + // Up to the SP to implement this lookup of keys. Possible ideas are: + // (1) do a lookup in a table of trusted certs keyed off of consumer + // + // Either way should return a string representation of the certificate + protected abstract function fetch_private_cert(&$request); + + public function build_signature($request, $consumer, $token) { + $base_string = $request->get_signature_base_string(); + $request->base_string = $base_string; + + // Fetch the private key cert based on the request + $cert = $this->fetch_private_cert($request); + + // Pull the private key ID from the certificate + $privatekeyid = openssl_get_privatekey($cert); + + // Sign using the key + $ok = openssl_sign($base_string, $signature, $privatekeyid); + + // Release the key resource + openssl_free_key($privatekeyid); + + return base64_encode($signature); + } + + public function check_signature($request, $consumer, $token, $signature) { + $decoded_sig = base64_decode($signature); + + $base_string = $request->get_signature_base_string(); + + // Fetch the public key cert based on the request + $cert = $this->fetch_public_cert($request); + + // Pull the public key ID from the certificate + $publickeyid = openssl_get_publickey($cert); -class twitterOAuthRequest extends OAuthRequest { + // Check the computed signature against the one passed in the query + $ok = openssl_verify($base_string, $decoded_sig, $publickeyid); + + // Release the key resource + openssl_free_key($publickeyid); + + return $ok == 1; + } +} + +class OAuthRequest { private $parameters; private $http_method; private $http_url; @@ -138,7 +283,7 @@ class twitterOAuthRequest extends OAuthRequest { } - return new twitterOAuthRequest($http_method, $http_url, $parameters); + return new OAuthRequest($http_method, $http_url, $parameters); } /** @@ -146,16 +291,16 @@ class twitterOAuthRequest extends OAuthRequest { */ public static function from_consumer_and_token($consumer, $token, $http_method, $http_url, $parameters=NULL) { @$parameters or $parameters = array(); - $defaults = array("oauth_version" => twitterOAuthRequest::$version, - "oauth_nonce" => twitterOAuthRequest::generate_nonce(), - "oauth_timestamp" => twitterOAuthRequest::generate_timestamp(), + $defaults = array("oauth_version" => OAuthRequest::$version, + "oauth_nonce" => OAuthRequest::generate_nonce(), + "oauth_timestamp" => OAuthRequest::generate_timestamp(), "oauth_consumer_key" => $consumer->key); if ($token) $defaults['oauth_token'] = $token->key; $parameters = array_merge($defaults, $parameters); - return new twitterOAuthRequest($http_method, $http_url, $parameters); + return new OAuthRequest($http_method, $http_url, $parameters); } public function set_parameter($name, $value, $allow_duplicates = true) { @@ -333,6 +478,217 @@ class twitterOAuthRequest extends OAuthRequest { } } +class OAuthServer { + protected $timestamp_threshold = 300; // in seconds, five minutes + protected $version = '1.0'; // hi blaine + protected $signature_methods = array(); + + protected $data_store; + + function __construct($data_store) { + $this->data_store = $data_store; + } + + public function add_signature_method($signature_method) { + $this->signature_methods[$signature_method->get_name()] = + $signature_method; + } + + // high level functions + + /** + * process a request_token request + * returns the request token on success + */ + public function fetch_request_token(&$request) { + $this->get_version($request); + + $consumer = $this->get_consumer($request); + + // no token required for the initial token request + $token = NULL; + + $this->check_signature($request, $consumer, $token); + + // Rev A change + $callback = $request->get_parameter('oauth_callback'); + $new_token = $this->data_store->new_request_token($consumer, $callback); + + return $new_token; + } + + /** + * process an access_token request + * returns the access token on success + */ + public function fetch_access_token(&$request) { + $this->get_version($request); + + $consumer = $this->get_consumer($request); + + // requires authorized request token + $token = $this->get_token($request, $consumer, "request"); + + $this->check_signature($request, $consumer, $token); + + // Rev A change + $verifier = $request->get_parameter('oauth_verifier'); + $new_token = $this->data_store->new_access_token($token, $consumer, $verifier); + + return $new_token; + } + + /** + * verify an api call, checks all the parameters + */ + public function verify_request(&$request) { + $this->get_version($request); + $consumer = $this->get_consumer($request); + $token = $this->get_token($request, $consumer, "access"); + $this->check_signature($request, $consumer, $token); + return array($consumer, $token); + } + + // Internals from here + /** + * version 1 + */ + private function get_version(&$request) { + $version = $request->get_parameter("oauth_version"); + if (!$version) { + // Service Providers MUST assume the protocol version to be 1.0 if this parameter is not present. + // Chapter 7.0 ("Accessing Protected Ressources") + $version = '1.0'; + } + if ($version !== $this->version) { + throw new OAuthException("OAuth version '$version' not supported"); + } + return $version; + } + + /** + * figure out the signature with some defaults + */ + private function get_signature_method(&$request) { + $signature_method = + @$request->get_parameter("oauth_signature_method"); + + if (!$signature_method) { + // According to chapter 7 ("Accessing Protected Ressources") the signature-method + // parameter is required, and we can't just fallback to PLAINTEXT + throw new OAuthException('No signature method parameter. This parameter is required'); + } + + if (!in_array($signature_method, + array_keys($this->signature_methods))) { + throw new OAuthException( + "Signature method '$signature_method' not supported " . + "try one of the following: " . + implode(", ", array_keys($this->signature_methods)) + ); + } + return $this->signature_methods[$signature_method]; + } + + /** + * try to find the consumer for the provided request's consumer key + */ + private function get_consumer(&$request) { + $consumer_key = @$request->get_parameter("oauth_consumer_key"); + if (!$consumer_key) { + throw new OAuthException("Invalid consumer key"); + } + + $consumer = $this->data_store->lookup_consumer($consumer_key); + if (!$consumer) { + throw new OAuthException("Invalid consumer"); + } + + return $consumer; + } + + /** + * try to find the token for the provided request's token key + */ + private function get_token(&$request, $consumer, $token_type="access") { + $token_field = @$request->get_parameter('oauth_token'); + $token = $this->data_store->lookup_token( + $consumer, $token_type, $token_field + ); + if (!$token) { + throw new OAuthException("Invalid $token_type token: $token_field"); + } + return $token; + } + + /** + * all-in-one function to check the signature on a request + * should guess the signature method appropriately + */ + private function check_signature(&$request, $consumer, $token) { + // this should probably be in a different method + $timestamp = @$request->get_parameter('oauth_timestamp'); + $nonce = @$request->get_parameter('oauth_nonce'); + + $this->check_timestamp($timestamp); + $this->check_nonce($consumer, $token, $nonce, $timestamp); + + $signature_method = $this->get_signature_method($request); + + $signature = $request->get_parameter('oauth_signature'); + $valid_sig = $signature_method->check_signature( + $request, + $consumer, + $token, + $signature + ); + + if (!$valid_sig) { + throw new OAuthException("Invalid signature"); + } + } + + /** + * check that the timestamp is new enough + */ + private function check_timestamp($timestamp) { + if( ! $timestamp ) + throw new OAuthException( + 'Missing timestamp parameter. The parameter is required' + ); + + // verify that timestamp is recentish + $now = time(); + if (abs($now - $timestamp) > $this->timestamp_threshold) { + throw new OAuthException( + "Expired timestamp, yours $timestamp, ours $now" + ); + } + } + + /** + * check that the nonce is not repeated + */ + private function check_nonce($consumer, $token, $nonce, $timestamp) { + if( ! $nonce ) + throw new OAuthException( + 'Missing nonce parameter. The parameter is required' + ); + + // verify that the nonce is uniqueish + $found = $this->data_store->lookup_nonce( + $consumer, + $token, + $nonce, + $timestamp + ); + if ($found) { + throw new OAuthException("Nonce already used: $nonce"); + } + } + +} + class OAuthDataStore { function lookup_consumer($consumer_key) { // implement me @@ -514,5 +870,3 @@ class OAuthUtil { return implode('&', $pairs); } } - -?> diff --git a/mod/twitter_api/vendors/twitteroauth/README b/mod/twitter_api/vendors/twitteroauth/README index 33cb91f21..c9a17ce4b 100644 --- a/mod/twitter_api/vendors/twitteroauth/README +++ b/mod/twitter_api/vendors/twitteroauth/README @@ -1,7 +1,114 @@ -Abraham Williams | abraham@poseurte.ch | http://abrah.am | @abraham +TwitterOAuth +------------ -The first PHP library for working with Twitter's OAuth API. +PHP library for working with Twitter's OAuth API. -Documentation: http://wiki.github.com/abraham/twitteroauth/documentation -Source: http://github.com/abraham/twitteroauth -Twitter: http://apiwiki.twitter.com +Flow Overview +============= + +1. Build TwitterOAuth object using client credentials. +2. Request temporary credentials from Twitter. +3. Build authorize URL for Twitter. +4. Redirect user to authorize URL. +5. User authorizes access and returns from Twitter. +6. Rebuild TwitterOAuth object with client credentials and temporary credentials. +7. Get token credentials from Twitter. +8. Rebuild TwitterOAuth object with client credentials and token credentials. +9. Query Twitter API. + +Terminology +=========== + +The terminology has changed since 0.1.x to better match the draft-hammer-oauth IETF +RFC. You can read that at http://tools.ietf.org/html/draft-hammer-oauth. Some of the +terms will differ from those Twitter uses as well. + +client credentials - Consumer key/secret you get when registering an app with Twitter. +temporary credentials - Previously known as the request token. +token credentials - Previously known as the access token. + +Parameters +========== + +There are a number of parameters you can modify after creating a TwitterOAuth object. + +Switch an existing TwitterOAuth install to use version 1.1 of the API. + + $connection->$host = "https://api.twitter.com/1.1/"; + +Custom useragent. + + $connection->useragent = 'Custom useragent string'; + +Verify Twitters SSL certificate. + + $connection->ssl_verifypeer = TRUE; + +There are several more you can find in TwitterOAuth.php. + +Extended flow using example code +================================ + +To use TwitterOAuth with the Twitter API you need *TwitterOAuth.php*, *OAuth.php* and +client credentials. You can get client credentials by registering your application at +[dev.twitter.com/apps](https://dev.twitter.com/apps). + +Users start out on connect.php which displays the "Sign in with Twitter" image hyperlinked +to redirect.php. This button should be displayed on your homepage in your login section. The +client credentials are saved in config.php as `CONSUMER_KEY` and `CONSUMER_SECRET`. You can +save a static callback URL in the app settings page, in the config file or use a dynamic +callback URL later in step 2. In example use https://example.com/callback.php. + +1) When a user lands on redirect.php we build a new TwitterOAuth object using the client credentials. +If you have your own configuration method feel free to use it instead of config.php. + + $connection = new TwitterOAuth(CONSUMER_KEY, CONSUMER_SECRET); // Use config.php client credentials + $connection = new TwitterOAuth('abc890', '123xyz'); + +2) Using the built $connection object you will ask Twitter for temporary credentials. The `oauth_callback` value is required. + + $temporary_credentials = $connection->getRequestToken(OAUTH_CALLBACK); // Use config.php callback URL. + +3) Now that we have temporary credentials the user has to go to Twitter and authorize the app +to access and updates their data. You can also pass a second parameter of FALSE to not use [Sign +in with Twitter](https://dev.twitter.com/docs/auth/sign-twitter). + + $redirect_url = $connection->getAuthorizeURL($temporary_credentials); // Use Sign in with Twitter + $redirect_url = $connection->getAuthorizeURL($temporary_credentials, FALSE); + +4) You will now have a Twitter URL that you must send the user to. + + https://api.twitter.com/oauth/authenticate?oauth_token=xyz123 + +5) The user is now on twitter.com and may have to login. Once authenticated with Twitter they will +will either have to click on allow/deny, or will be automatically redirected back to the callback. + +6) Now that the user has returned to callback.php and allowed access we need to build a new +TwitterOAuth object using the temporary credentials. + + $connection = new TwitterOAuth(CONSUMER_KEY, CONSUMER_SECRET, $_SESSION['oauth_token'], + $_SESSION['oauth_token_secret']); + +7) Now we ask Twitter for long lasting token credentials. These are specific to the application +and user and will act like password to make future requests. Normally the token credentials would +get saved in your database but for this example we are just using sessions. + + $token_credentials = $connection->getAccessToken($_REQUEST['oauth_verifier']); + +8) With the token credentials we build a new TwitterOAuth object. + + $connection = new TwitterOAuth(CONSUMER_KEY, CONSUMER_SECRET, $token_credentials['oauth_token'], + $token_credentials['oauth_token_secret']); + +9) And finally we can make requests authenticated as the user. You can GET, POST, and DELETE API +methods. Directly copy the path from the API documentation and add an array of any parameter +you wish to include for the API method such as curser or in_reply_to_status_id. + + $account = $connection->get('account/verify_credentials'); + $status = $connection->post('statuses/update', array('status' => 'Text of status here', 'in_reply_to_status_id' => 123456)); + $status = $connection->delete('statuses/destroy/12345'); + +Contributors +============ + +* [Abraham Williams](https://twitter.com/abraham) - Main developer, current maintainer. diff --git a/mod/twitter_api/vendors/twitteroauth/twitterOAuth.php b/mod/twitter_api/vendors/twitteroauth/twitterOAuth.php index f36e6158d..4c2447c46 100644 --- a/mod/twitter_api/vendors/twitteroauth/twitterOAuth.php +++ b/mod/twitter_api/vendors/twitteroauth/twitterOAuth.php @@ -57,7 +57,7 @@ class TwitterOAuth { * construct TwitterOAuth object */ function __construct($consumer_key, $consumer_secret, $oauth_token = NULL, $oauth_token_secret = NULL) { - $this->sha1_method = new twitterOAuthSignatureMethod_HMAC_SHA1(); + $this->sha1_method = new OAuthSignatureMethod_HMAC_SHA1(); $this->consumer = new OAuthConsumer($consumer_key, $consumer_secret); if (!empty($oauth_token) && !empty($oauth_token_secret)) { $this->token = new OAuthConsumer($oauth_token, $oauth_token_secret); @@ -72,11 +72,9 @@ class TwitterOAuth { * * @returns a key/value array containing oauth_token and oauth_token_secret */ - function getRequestToken($oauth_callback = NULL) { + function getRequestToken($oauth_callback) { $parameters = array(); - if (!empty($oauth_callback)) { - $parameters['oauth_callback'] = $oauth_callback; - } + $parameters['oauth_callback'] = $oauth_callback; $request = $this->oAuthRequest($this->requestTokenURL(), 'GET', $parameters); $token = OAuthUtil::parse_parameters($request); $this->token = new OAuthConsumer($token['oauth_token'], $token['oauth_token_secret']); @@ -108,11 +106,9 @@ class TwitterOAuth { * "user_id" => "9436992", * "screen_name" => "abraham") */ - function getAccessToken($oauth_verifier = FALSE) { + function getAccessToken($oauth_verifier) { $parameters = array(); - if (!empty($oauth_verifier)) { - $parameters['oauth_verifier'] = $oauth_verifier; - } + $parameters['oauth_verifier'] = $oauth_verifier; $request = $this->oAuthRequest($this->accessTokenURL(), 'GET', $parameters); $token = OAuthUtil::parse_parameters($request); $this->token = new OAuthConsumer($token['oauth_token'], $token['oauth_token_secret']); @@ -179,7 +175,7 @@ class TwitterOAuth { if (strrpos($url, 'https://') !== 0 && strrpos($url, 'http://') !== 0) { $url = "{$this->host}{$url}.{$this->format}"; } - $request = twitterOAuthRequest::from_consumer_and_token($this->consumer, $this->token, $method, $url, $parameters); + $request = OAuthRequest::from_consumer_and_token($this->consumer, $this->token, $method, $url, $parameters); $request->sign_request($this->sha1_method, $this->consumer, $this->token); switch ($method) { case 'GET': -- cgit v1.2.3 From e96a2e9b8ec370a53900a82847502cee763277d3 Mon Sep 17 00:00:00 2001 From: cash Date: Sun, 14 Apr 2013 20:58:45 -0400 Subject: centralize the creation of the api object --- mod/twitter_api/lib/twitter_api.php | 32 +++++++++++++++++++++----------- mod/twitter_api/start.php | 27 ++++++++++----------------- 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/mod/twitter_api/lib/twitter_api.php b/mod/twitter_api/lib/twitter_api.php index e163d2b3e..1299232c0 100644 --- a/mod/twitter_api/lib/twitter_api.php +++ b/mod/twitter_api/lib/twitter_api.php @@ -5,6 +5,24 @@ * @package twitter_api */ +/** + * Get the API wrapper object + * + * @param string $oauth_token User's OAuth token + * @param string $oauth_token_secret User's OAuth secret + * @return TwitterOAuth|null + */ +function twitter_api_get_api_object($oauth_token = null, $oauth_token_secret = null) { + $consumer_key = elgg_get_plugin_setting('consumer_key', 'twitter_api'); + $consumer_secret = elgg_get_plugin_setting('consumer_secret', 'twitter_api'); + if (!($consumer_key && $consumer_secret)) { + return null; + } + + $api = new TwitterOAuth($consumer_key, $consumer_secret, $oauth_token, $oauth_token_secret); + return $api; +} + /** * Tests if the system admin has enabled Sign-On-With-Twitter * @@ -121,9 +139,7 @@ function twitter_api_login() { forward(); } } else { - $consumer_key = elgg_get_plugin_setting('consumer_key', 'twitter_api'); - $consumer_secret = elgg_get_plugin_setting('consumer_secret', 'twitter_api'); - $api = new TwitterOAuth($consumer_key, $consumer_secret, $token['oauth_token'], $token['oauth_token_secret']); + $api = twitter_api_get_api_object($token['oauth_token'], $token['oauth_token_secret']); $twitter = $api->get('account/verify_credentials'); // backward compatibility for deprecated Twitter Login plugin @@ -314,11 +330,8 @@ function twitter_api_revoke() { function twitter_api_get_authorize_url($callback = NULL, $login = true) { global $SESSION; - $consumer_key = elgg_get_plugin_setting('consumer_key', 'twitter_api'); - $consumer_secret = elgg_get_plugin_setting('consumer_secret', 'twitter_api'); - // request tokens from Twitter - $twitter = new TwitterOAuth($consumer_key, $consumer_secret); + $twitter = twitter_api_get_api_object(); $token = $twitter->getRequestToken($callback); // save token in session for use after authorization @@ -340,16 +353,13 @@ function twitter_api_get_access_token($oauth_verifier = FALSE) { /* @var ElggSession $SESSION */ global $SESSION; - $consumer_key = elgg_get_plugin_setting('consumer_key', 'twitter_api'); - $consumer_secret = elgg_get_plugin_setting('consumer_secret', 'twitter_api'); - // retrieve stored tokens $oauth_token = $SESSION['twitter_api']['oauth_token']; $oauth_token_secret = $SESSION['twitter_api']['oauth_token_secret']; unset($SESSION['twitter_api']); // fetch an access token - $api = new TwitterOAuth($consumer_key, $consumer_secret, $oauth_token, $oauth_token_secret); + $api = twitter_api_get_api_object($oauth_token, $oauth_token_secret); return $api->getAccessToken($oauth_verifier); } diff --git a/mod/twitter_api/start.php b/mod/twitter_api/start.php index e6221de6b..e3e866c1f 100644 --- a/mod/twitter_api/start.php +++ b/mod/twitter_api/start.php @@ -115,13 +115,6 @@ function twitter_api_tweet($hook, $type, $returnvalue, $params) { // @todo - allow admin to select origins? - // check admin settings - $consumer_key = elgg_get_plugin_setting('consumer_key', 'twitter_api'); - $consumer_secret = elgg_get_plugin_setting('consumer_secret', 'twitter_api'); - if (!($consumer_key && $consumer_secret)) { - return; - } - // check user settings $user_id = $params['user']->getGUID(); $access_key = elgg_get_plugin_user_setting('access_key', $user_id, 'twitter_api'); @@ -130,8 +123,11 @@ function twitter_api_tweet($hook, $type, $returnvalue, $params) { return; } - // send tweet - $api = new TwitterOAuth($consumer_key, $consumer_secret, $access_key, $access_secret); + $api = twitter_api_get_api_object($access_key, $access_secret); + if (!$api) { + return; + } + $api->post('statuses/update', array('status' => $params['message'])); } @@ -143,12 +139,6 @@ function twitter_api_tweet($hook, $type, $returnvalue, $params) { * @return array */ function twitter_api_fetch_tweets($user_guid, $options = array()) { - // check admin settings - $consumer_key = elgg_get_plugin_setting('consumer_key', 'twitter_api'); - $consumer_secret = elgg_get_plugin_setting('consumer_secret', 'twitter_api'); - if (!($consumer_key && $consumer_secret)) { - return FALSE; - } // check user settings $access_key = elgg_get_plugin_user_setting('access_key', $user_guid, 'twitter_api'); @@ -157,8 +147,11 @@ function twitter_api_fetch_tweets($user_guid, $options = array()) { return FALSE; } - // fetch tweets - $api = new TwitterOAuth($consumer_key, $consumer_secret, $access_key, $access_secret); + $api = twitter_api_get_api_object($access_key, $access_secret); + if (!$api) { + return FALSE; + } + return $api->get('statuses/user_timeline', $options); } -- cgit v1.2.3 From 53c0783cdf59d7320233b79b7bb2b019bdfbd33a Mon Sep 17 00:00:00 2001 From: cash Date: Sun, 14 Apr 2013 21:03:55 -0400 Subject: Fixes #4917 Using twitter API v1.1 --- mod/twitter_api/lib/twitter_api.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mod/twitter_api/lib/twitter_api.php b/mod/twitter_api/lib/twitter_api.php index 1299232c0..8cb1b885e 100644 --- a/mod/twitter_api/lib/twitter_api.php +++ b/mod/twitter_api/lib/twitter_api.php @@ -20,6 +20,9 @@ function twitter_api_get_api_object($oauth_token = null, $oauth_token_secret = n } $api = new TwitterOAuth($consumer_key, $consumer_secret, $oauth_token, $oauth_token_secret); + if ($api) { + $api->host = "https://api.twitter.com/1.1/"; + } return $api; } -- cgit v1.2.3 From 9c3d423d87fe5f287a48158df105364b6eb36de0 Mon Sep 17 00:00:00 2001 From: cash Date: Sun, 14 Apr 2013 21:06:36 -0400 Subject: removes oauth plugin as twitter_api does not depend on it anymore --- mod/oauth_api/manifest.xml | 25 - mod/oauth_api/start.php | 24 - mod/oauth_api/vendors/oauth/LICENSE | 21 - mod/oauth_api/vendors/oauth/example/server/INSTALL | 53 - .../vendors/oauth/example/server/core/init.php | 127 -- .../example/server/core/templates/inc/footer.tpl | 2 - .../example/server/core/templates/inc/header.tpl | 2 - .../oauth/example/server/core/templates/index.tpl | 13 - .../oauth/example/server/core/templates/logon.tpl | 21 - .../example/server/core/templates/register.tpl | 41 - .../vendors/oauth/example/server/www/hello.php | 65 - .../vendors/oauth/example/server/www/index.php | 37 - .../vendors/oauth/example/server/www/logon.php | 55 - .../vendors/oauth/example/server/www/oauth.php | 77 - .../vendors/oauth/example/server/www/register.php | 28 - .../oauth/example/server/www/services.xrds.php | 71 - .../vendors/oauth/library/OAuthDiscovery.php | 226 --- .../vendors/oauth/library/OAuthException.php | 50 - .../vendors/oauth/library/OAuthRequest.php | 801 --------- .../vendors/oauth/library/OAuthRequestLogger.php | 274 --- .../vendors/oauth/library/OAuthRequestSigner.php | 209 --- .../vendors/oauth/library/OAuthRequestVerifier.php | 262 --- .../vendors/oauth/library/OAuthRequester.php | 508 ------ .../vendors/oauth/library/OAuthServer.php | 232 --- mod/oauth_api/vendors/oauth/library/OAuthStore.php | 86 - .../library/body/OAuthBodyContentDisposition.php | 129 -- .../library/body/OAuthBodyMultipartFormdata.php | 143 -- .../vendors/oauth/library/discovery/xrds_parse.php | 304 ---- .../vendors/oauth/library/discovery/xrds_parse.txt | 101 -- .../OAuthSignatureMethod.class.php | 69 - .../OAuthSignatureMethod_HMAC_SHA1.php | 115 -- .../signature_method/OAuthSignatureMethod_MD5.php | 95 - .../OAuthSignatureMethod_PLAINTEXT.php | 80 - .../OAuthSignatureMethod_RSA_SHA1.php | 136 -- .../library/store/OAuthStoreAbstract.class.php | 149 -- .../oauth/library/store/OAuthStoreAnyMeta.php | 265 --- .../oauth/library/store/OAuthStoreMySQL.php | 1879 -------------------- .../vendors/oauth/library/store/mysql/install.php | 32 - .../vendors/oauth/library/store/mysql/mysql.sql | 219 --- .../oauth/test/discovery/xrds-fireeagle.xrds | 78 - .../oauth/test/discovery/xrds-getsatisfaction.xrds | 73 - .../oauth/test/discovery/xrds-magnolia.xrds | 81 - mod/oauth_api/vendors/oauth/test/oauth_test.php | 188 -- 43 files changed, 7446 deletions(-) delete mode 100644 mod/oauth_api/manifest.xml delete mode 100644 mod/oauth_api/start.php delete mode 100644 mod/oauth_api/vendors/oauth/LICENSE delete mode 100644 mod/oauth_api/vendors/oauth/example/server/INSTALL delete mode 100644 mod/oauth_api/vendors/oauth/example/server/core/init.php delete mode 100644 mod/oauth_api/vendors/oauth/example/server/core/templates/inc/footer.tpl delete mode 100644 mod/oauth_api/vendors/oauth/example/server/core/templates/inc/header.tpl delete mode 100644 mod/oauth_api/vendors/oauth/example/server/core/templates/index.tpl delete mode 100644 mod/oauth_api/vendors/oauth/example/server/core/templates/logon.tpl delete mode 100644 mod/oauth_api/vendors/oauth/example/server/core/templates/register.tpl delete mode 100644 mod/oauth_api/vendors/oauth/example/server/www/hello.php delete mode 100644 mod/oauth_api/vendors/oauth/example/server/www/index.php delete mode 100644 mod/oauth_api/vendors/oauth/example/server/www/logon.php delete mode 100644 mod/oauth_api/vendors/oauth/example/server/www/oauth.php delete mode 100644 mod/oauth_api/vendors/oauth/example/server/www/register.php delete mode 100644 mod/oauth_api/vendors/oauth/example/server/www/services.xrds.php delete mode 100644 mod/oauth_api/vendors/oauth/library/OAuthDiscovery.php delete mode 100644 mod/oauth_api/vendors/oauth/library/OAuthException.php delete mode 100644 mod/oauth_api/vendors/oauth/library/OAuthRequest.php delete mode 100644 mod/oauth_api/vendors/oauth/library/OAuthRequestLogger.php delete mode 100644 mod/oauth_api/vendors/oauth/library/OAuthRequestSigner.php delete mode 100644 mod/oauth_api/vendors/oauth/library/OAuthRequestVerifier.php delete mode 100644 mod/oauth_api/vendors/oauth/library/OAuthRequester.php delete mode 100644 mod/oauth_api/vendors/oauth/library/OAuthServer.php delete mode 100644 mod/oauth_api/vendors/oauth/library/OAuthStore.php delete mode 100644 mod/oauth_api/vendors/oauth/library/body/OAuthBodyContentDisposition.php delete mode 100644 mod/oauth_api/vendors/oauth/library/body/OAuthBodyMultipartFormdata.php delete mode 100644 mod/oauth_api/vendors/oauth/library/discovery/xrds_parse.php delete mode 100644 mod/oauth_api/vendors/oauth/library/discovery/xrds_parse.txt delete mode 100644 mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod.class.php delete mode 100644 mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_HMAC_SHA1.php delete mode 100644 mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_MD5.php delete mode 100644 mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_PLAINTEXT.php delete mode 100644 mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_RSA_SHA1.php delete mode 100644 mod/oauth_api/vendors/oauth/library/store/OAuthStoreAbstract.class.php delete mode 100644 mod/oauth_api/vendors/oauth/library/store/OAuthStoreAnyMeta.php delete mode 100644 mod/oauth_api/vendors/oauth/library/store/OAuthStoreMySQL.php delete mode 100644 mod/oauth_api/vendors/oauth/library/store/mysql/install.php delete mode 100644 mod/oauth_api/vendors/oauth/library/store/mysql/mysql.sql delete mode 100644 mod/oauth_api/vendors/oauth/test/discovery/xrds-fireeagle.xrds delete mode 100644 mod/oauth_api/vendors/oauth/test/discovery/xrds-getsatisfaction.xrds delete mode 100644 mod/oauth_api/vendors/oauth/test/discovery/xrds-magnolia.xrds delete mode 100644 mod/oauth_api/vendors/oauth/test/oauth_test.php diff --git a/mod/oauth_api/manifest.xml b/mod/oauth_api/manifest.xml deleted file mode 100644 index 991be6a22..000000000 --- a/mod/oauth_api/manifest.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - OAuth API - Core developers - 1.8 - Provides OAuth libraries and API support. - bundled - api - http://www.elgg.org/ - See COPYRIGHT.txt - GNU General Public License version 2 - - elgg_release - 1.8 - - - - plugin - oauth_lib - - - php_extension - oauth - - diff --git a/mod/oauth_api/start.php b/mod/oauth_api/start.php deleted file mode 100644 index d087a13d1..000000000 --- a/mod/oauth_api/start.php +++ /dev/null @@ -1,24 +0,0 @@ - - ServerAdmin admin@localhost - ServerName hello.local - DocumentRoot /home/john/src/oauth-php/example/server/www - - UseCanonicalName Off - ServerSignature On - - SetEnv DB_DSN mysql://foo:bar@localhost/oauth_example_server_db - - - Options Indexes FollowSymLinks MultiViews - AllowOverride None - Order allow,deny - Allow from all - - - php_value magic_quotes_gpc 0 - php_value register_globals 0 - php_value session.auto_start 0 - - - - - - -2) Create the database structure for the server: - -# mysql -u foo -p bar -h localhost < /home/john/src/oauth-php/library/store/mysql/mysql.sql - - - -3) Download and install smarty into the smarty/core/smarty directory: - -# cd /home/john/src/oauth-php/example/server/core -# wget 'http://www.smarty.net/do_download.php?download_file=Smarty-2.6.19.tar.gz' -# tar zxf Smarty-2.6.19.tar.gz -# mv Smarty-2.6.19 smarty - - -4) That's it! Point your browser to - - http://hello.local/ - -To get started. - -Arjan Scherpenisse , July 2008 diff --git a/mod/oauth_api/vendors/oauth/example/server/core/init.php b/mod/oauth_api/vendors/oauth/example/server/core/init.php deleted file mode 100644 index e5bb9de35..000000000 --- a/mod/oauth_api/vendors/oauth/example/server/core/init.php +++ /dev/null @@ -1,127 +0,0 @@ - - * - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - - -/* - * Simple 'user management' - */ -define ('USERNAME', 'sysadmin'); -define ('PASSWORD', 'sysadmin'); - - -/* - * Always announce XRDS OAuth discovery - */ -header('X-XRDS-Location: http://' . $_SERVER['SERVER_NAME'] . '/services.xrds'); - - -/* - * Initialize the database connection - */ -$info = parse_url(getenv('DB_DSN')); -($GLOBALS['db_conn'] = mysql_connect($info['host'], $info['user'], $info['pass'])) || die(mysql_error()); -mysql_select_db(basename($info['path']), $GLOBALS['db_conn']) || die(mysql_error()); -unset($info); - - -require_once '../../../library/OAuthServer.php'; - -/* - * Initialize OAuth store - */ -require_once '../../../library/OAuthStore.php'; -OAuthStore::instance('MySQL', array('conn' => $GLOBALS['db_conn'])); - - -/* - * Session - */ -session_start(); - - -/* - * Template handling - */ -require_once 'smarty/libs/Smarty.class.php'; -function session_smarty() -{ - if (!isset($GLOBALS['smarty'])) - { - $GLOBALS['smarty'] = new Smarty; - $GLOBALS['smarty']->template_dir = dirname(__FILE__) . '/templates/'; - $GLOBALS['smarty']->compile_dir = dirname(__FILE__) . '/../cache/templates_c'; - } - - return $GLOBALS['smarty']; -} - -function assert_logged_in() -{ - if (empty($_SESSION['authorized'])) - { - $uri = $_SERVER['REQUEST_URI']; - header('Location: /logon?goto=' . urlencode($uri)); - } -} - -function assert_request_vars() -{ - foreach(func_get_args() as $a) - { - if (!isset($_REQUEST[$a])) - { - header('HTTP/1.1 400 Bad Request'); - echo 'Bad request.'; - exit; - } - } -} - -function assert_request_vars_all() -{ - foreach($_REQUEST as $row) - { - foreach(func_get_args() as $a) - { - if (!isset($row[$a])) - { - header('HTTP/1.1 400 Bad Request'); - echo 'Bad request.'; - exit; - } - } - } -} - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/example/server/core/templates/inc/footer.tpl b/mod/oauth_api/vendors/oauth/example/server/core/templates/inc/footer.tpl deleted file mode 100644 index 308b1d01b..000000000 --- a/mod/oauth_api/vendors/oauth/example/server/core/templates/inc/footer.tpl +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/mod/oauth_api/vendors/oauth/example/server/core/templates/inc/header.tpl b/mod/oauth_api/vendors/oauth/example/server/core/templates/inc/header.tpl deleted file mode 100644 index 5046f54b0..000000000 --- a/mod/oauth_api/vendors/oauth/example/server/core/templates/inc/header.tpl +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/mod/oauth_api/vendors/oauth/example/server/core/templates/index.tpl b/mod/oauth_api/vendors/oauth/example/server/core/templates/index.tpl deleted file mode 100644 index 7b065537d..000000000 --- a/mod/oauth_api/vendors/oauth/example/server/core/templates/index.tpl +++ /dev/null @@ -1,13 +0,0 @@ -{include file='inc/header.tpl'} - -

    OAuth server

    -Go to: - - - -Afterwards, make an OAuth test request to http://{$smarty.server.name}/hello to test your connection.

    - -{include file='inc/footer.tpl'} diff --git a/mod/oauth_api/vendors/oauth/example/server/core/templates/logon.tpl b/mod/oauth_api/vendors/oauth/example/server/core/templates/logon.tpl deleted file mode 100644 index 5ccd432b5..000000000 --- a/mod/oauth_api/vendors/oauth/example/server/core/templates/logon.tpl +++ /dev/null @@ -1,21 +0,0 @@ -{include file='inc/header.tpl'} - -

    Login

    - -
    - - -
    - - -

    - -
    - - -

    - - -
    - -{include file='inc/footer.tpl'} diff --git a/mod/oauth_api/vendors/oauth/example/server/core/templates/register.tpl b/mod/oauth_api/vendors/oauth/example/server/core/templates/register.tpl deleted file mode 100644 index 0e28c1584..000000000 --- a/mod/oauth_api/vendors/oauth/example/server/core/templates/register.tpl +++ /dev/null @@ -1,41 +0,0 @@ -{include file='inc/header.tpl'} - -

    Register server

    - -

    Register a server which is gonna act as an identity client.

    - -
    - -
    - About You - -

    -
    - -

    - -

    -
    - -

    -
    - -
    - Location Of Your Application Or Site - -

    -
    - -

    - -

    -
    - -

    -
    - -
    - -
    - -{include file='inc/footer.tpl'} diff --git a/mod/oauth_api/vendors/oauth/example/server/www/hello.php b/mod/oauth_api/vendors/oauth/example/server/www/hello.php deleted file mode 100644 index 8cb94bb1e..000000000 --- a/mod/oauth_api/vendors/oauth/example/server/www/hello.php +++ /dev/null @@ -1,65 +0,0 @@ - - * - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -require_once '../core/init.php'; - -$authorized = false; -$server = new OAuthServer(); -try -{ - if ($server->verifyIfSigned()) - { - $authorized = true; - } -} -catch (OAuthException $e) -{ -} - -if (!$authorized) -{ - header('HTTP/1.1 401 Unauthorized'); - header('Content-Type: text/plain'); - - echo "OAuth Verification Failed: " . $e->getMessage(); - die; -} - -// From here on we are authenticated with OAuth. - -header('Content-type: text/plain'); -echo 'Hello, world!'; - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/example/server/www/index.php b/mod/oauth_api/vendors/oauth/example/server/www/index.php deleted file mode 100644 index f5cadbe61..000000000 --- a/mod/oauth_api/vendors/oauth/example/server/www/index.php +++ /dev/null @@ -1,37 +0,0 @@ - - * - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -require '../core/init.php'; - -$smarty = session_smarty(); -$smarty->display('index.tpl'); - -?> diff --git a/mod/oauth_api/vendors/oauth/example/server/www/logon.php b/mod/oauth_api/vendors/oauth/example/server/www/logon.php deleted file mode 100644 index 5c937b719..000000000 --- a/mod/oauth_api/vendors/oauth/example/server/www/logon.php +++ /dev/null @@ -1,55 +0,0 @@ - - * - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -require_once '../core/init.php'; - -if (isset($_POST['username']) && isset($_POST['password'])) -{ - if ($_POST['username'] == USERNAME && $_POST['password'] == PASSWORD) - { - $_SESSION['authorized'] = true; - if (!empty($_REQUEST['goto'])) - { - header('Location: ' . $_REQUEST['goto']); - die; - } - - echo "Logon succesfull."; - die; - } -} - -$smarty = session_smarty(); -$smarty->display('logon.tpl'); - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/example/server/www/oauth.php b/mod/oauth_api/vendors/oauth/example/server/www/oauth.php deleted file mode 100644 index e0badcc39..000000000 --- a/mod/oauth_api/vendors/oauth/example/server/www/oauth.php +++ /dev/null @@ -1,77 +0,0 @@ - - * - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -require_once '../core/init.php'; - -$server = new OAuthServer(); - -switch($_SERVER['PATH_INFO']) -{ -case '/request_token': - $server->requestToken(); - exit; - -case '/access_token': - $server->accessToken(); - exit; - -case '/authorize': - # logon - - assert_logged_in(); - - try - { - $server->authorizeVerify(); - $server->authorizeFinish(true, 1); - } - catch (OAuthException $e) - { - header('HTTP/1.1 400 Bad Request'); - header('Content-Type: text/plain'); - - echo "Failed OAuth Request: " . $e->getMessage(); - } - exit; - - -default: - header('HTTP/1.1 500 Internal Server Error'); - header('Content-Type: text/plain'); - echo "Unknown request"; -} - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/example/server/www/register.php b/mod/oauth_api/vendors/oauth/example/server/www/register.php deleted file mode 100644 index c5785c2c8..000000000 --- a/mod/oauth_api/vendors/oauth/example/server/www/register.php +++ /dev/null @@ -1,28 +0,0 @@ -updateConsumer($_POST, 1, true); - - $c = $store->getConsumer($key); - echo 'Your consumer key is: ' . $c['consumer_key'] . '
    '; - echo 'Your consumer secret is: ' . $c['consumer_secret'] . '
    '; - } - catch (OAuthException $e) - { - echo 'Error: ' . $e->getMessage() . '
    '; - } -} - - -$smarty = session_smarty(); -$smarty->display('register.tpl'); - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/example/server/www/services.xrds.php b/mod/oauth_api/vendors/oauth/example/server/www/services.xrds.php deleted file mode 100644 index 4c50aa12b..000000000 --- a/mod/oauth_api/vendors/oauth/example/server/www/services.xrds.php +++ /dev/null @@ -1,71 +0,0 @@ - - * - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -header('Content-Type: application/xrds+xml'); - -$server = $_SERVER['SERVER_NAME']; - -echo '' . "\n"; - -?> - - - xri://$xrds*simple - - http://oauth.net/discovery/1.0 - #main - - - http://oauth.net/core/1.0/endpoint/request - http://oauth.net/core/1.0/parameters/auth-header - http://oauth.net/core/1.0/parameters/uri-query - http://oauth.net/core/1.0/signature/HMAC-SHA1 - http://oauth.net/core/1.0/signature/PLAINTEXT - http:///oauth/request_token - - - http://oauth.net/core/1.0/endpoint/authorize - http://oauth.net/core/1.0/parameters/uri-query - http:///oauth/authorize - - - http://oauth.net/core/1.0/endpoint/access - http://oauth.net/core/1.0/parameters/auth-header - http://oauth.net/core/1.0/parameters/uri-query - http://oauth.net/core/1.0/signature/HMAC-SHA1 - http://oauth.net/core/1.0/signature/PLAINTEXT - http:///oauth/access_token - - - diff --git a/mod/oauth_api/vendors/oauth/library/OAuthDiscovery.php b/mod/oauth_api/vendors/oauth/library/OAuthDiscovery.php deleted file mode 100644 index d097756dd..000000000 --- a/mod/oauth_api/vendors/oauth/library/OAuthDiscovery.php +++ /dev/null @@ -1,226 +0,0 @@ - - * @date Sep 4, 2008 5:05:19 PM - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -require_once dirname(__FILE__).'/discovery/xrds_parse.php'; - -require_once dirname(__FILE__).'/OAuthException.php'; -require_once dirname(__FILE__).'/OAuthRequestLogger.php'; - - -class OAuthDiscovery -{ - /** - * Return a description how we can do a consumer allocation. Prefers static allocation if - * possible. If static allocation is possible - * - * See also: http://oauth.net/discovery/#consumer_identity_types - * - * @param string uri - * @return array provider description - */ - static function discover ( $uri ) - { - // See what kind of consumer allocations are available - $xrds_file = self::discoverXRDS($uri); - if (!empty($xrds_file)) - { - $xrds = xrds_parse($xrds_file); - if (empty($xrds)) - { - throw new OAuthException('Could not discover OAuth information for '.$uri); - } - } - else - { - throw new OAuthException('Could not discover XRDS file at '.$uri); - } - - // Fill an OAuthServer record for the uri found - $ps = parse_url($uri); - $host = isset($ps['host']) ? $ps['host'] : 'localhost'; - $server_uri = $ps['scheme'].'://'.$host.'/'; - - $p = array( - 'user_id' => null, - 'consumer_key' => '', - 'consumer_secret' => '', - 'signature_methods' => '', - 'server_uri' => $server_uri, - 'request_token_uri' => '', - 'authorize_uri' => '', - 'access_token_uri' => '' - ); - - - // Consumer identity (out of bounds or static) - if (isset($xrds['consumer_identity'])) - { - // Try to find a static consumer allocation, we like those :) - foreach ($xrds['consumer_identity'] as $ci) - { - if ($ci['method'] == 'static' && !empty($ci['consumer_key'])) - { - $p['consumer_key'] = $ci['consumer_key']; - $p['consumer_secret'] = ''; - } - else if ($ci['method'] == 'oob' && !empty($ci['uri'])) - { - // TODO: Keep this uri somewhere for the user? - $p['consumer_oob_uri'] = $ci['uri']; - } - } - } - - // The token uris - if (isset($xrds['request'][0]['uri'])) - { - $p['request_token_uri'] = $xrds['request'][0]['uri']; - if (!empty($xrds['request'][0]['signature_method'])) - { - $p['signature_methods'] = $xrds['request'][0]['signature_method']; - } - } - if (isset($xrds['authorize'][0]['uri'])) - { - $p['authorize_uri'] = $xrds['authorize'][0]['uri']; - if (!empty($xrds['authorize'][0]['signature_method'])) - { - $p['signature_methods'] = $xrds['authorize'][0]['signature_method']; - } - } - if (isset($xrds['access'][0]['uri'])) - { - $p['access_token_uri'] = $xrds['access'][0]['uri']; - if (!empty($xrds['access'][0]['signature_method'])) - { - $p['signature_methods'] = $xrds['access'][0]['signature_method']; - } - } - return $p; - } - - - /** - * Discover the XRDS file at the uri. This is a bit primitive, you should overrule - * this function so that the XRDS file can be cached for later referral. - * - * @param string uri - * @return string false when no XRDS file found - */ - static protected function discoverXRDS ( $uri, $recur = 0 ) - { - // Bail out when we are following redirects - if ($recur > 10) - { - return false; - } - - $data = self::curl($uri); - - // Check what we got back, could be: - // 1. The XRDS discovery file itself (check content-type) - // 2. The X-XRDS-Location header - - if (is_string($data) && !empty($data)) - { - list($head,$body) = explode("\r\n\r\n", $data); - $body = trim($body); - $m = false; - - // See if we got the XRDS file itself or we have to follow a location header - if ( preg_match('/^Content-Type:\s*application\/xrds+xml/im', $head) - || preg_match('/^<\?xml[^>]*\?>\s* \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/OAuthException.php b/mod/oauth_api/vendors/oauth/library/OAuthException.php deleted file mode 100644 index cadd1d032..000000000 --- a/mod/oauth_api/vendors/oauth/library/OAuthException.php +++ /dev/null @@ -1,50 +0,0 @@ - - * @date Nov 29, 2007 5:33:54 PM - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -// TODO: something with the HTTP return code matching to the problem - -require_once dirname(__FILE__) . '/OAuthRequestLogger.php'; - -class OAuthException extends Exception -{ - function __construct ( $message ) - { - Exception::__construct($message); - OAuthRequestLogger::addNote('OAuthException: '.$message); - } - -} - - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/OAuthRequest.php b/mod/oauth_api/vendors/oauth/library/OAuthRequest.php deleted file mode 100644 index c0d6ddbc7..000000000 --- a/mod/oauth_api/vendors/oauth/library/OAuthRequest.php +++ /dev/null @@ -1,801 +0,0 @@ - - * @date Nov 16, 2007 12:20:31 PM - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - - -require_once dirname(__FILE__) . '/OAuthException.php'; - -/** - * Object to parse an incoming OAuth request or prepare an outgoing OAuth request - */ -class OAuthRequest -{ - /* the realm for this request */ - protected $realm; - - /* all the parameters, RFC3986 encoded name/value pairs */ - protected $param = array(); - - /* the parsed request uri */ - protected $uri_parts; - - /* the raw request uri */ - protected $uri; - - /* the request headers */ - protected $headers; - - /* the request method */ - protected $method; - - /* the body of the OAuth request */ - protected $body; - - - /** - * Construct from the current request. Useful for checking the signature of a request. - * When not supplied with any parameters this will use the current request. - * - * @param string uri might include parameters - * @param string method GET, PUT, POST etc. - * @param string parameters additional post parameters, urlencoded (RFC1738) - * @param array headers headers for request - * @param string body optional body of the OAuth request (POST or PUT) - */ - function __construct ( $uri = null, $method = 'GET', $parameters = '', $headers = array(), $body = null ) - { - if (empty($uri)) - { - if (is_object($_SERVER)) - { - // Tainted arrays - the normal stuff in anyMeta - $method = $_SERVER->REQUEST_METHOD->getRawUnsafe(); - $uri = $_SERVER->REQUEST_URI->getRawUnsafe(); - } - else - { - // non anyMeta systems - $method = $_SERVER['REQUEST_METHOD']; - $uri = $_SERVER['REQUEST_URI']; - } - $headers = getallheaders(); - $parameters = ''; - $this->method = strtoupper($method); - - // If this is a post then also check the posted variables - if (strcasecmp($method, 'POST') == 0) - { - /* - // TODO: what to do with 'multipart/form-data'? - if ($this->getRequestContentType() == 'multipart/form-data') - { - throw new OAuthException('Unsupported POST content type, expected "application/x-www-form-urlencoded" got "'.@$_SERVER['CONTENT_TYPE'].'"'); - } - */ - if ($this->getRequestContentType() == 'application/x-www-form-urlencoded') - { - // Get the posted body (when available) - if (!isset($headers['X-OAuth-Test'])) - { - $parameters .= $this->getRequestBody(); - } - } - else - { - $body = $this->getRequestBody(); - } - } - else if (strcasecmp($method, 'PUT') == 0) - { - $body = $this->getRequestBody(); - } - } - - $this->method = strtoupper($method); - $this->headers = $headers; - // Store the values, prepare for oauth - $this->uri = $uri; - $this->body = $body; - $this->parseUri($parameters); - $this->parseHeaders(); - $this->transcodeParams(); - } - - - /** - * Return the signature base string. - * Note that we can't use rawurlencode due to specified use of RFC3986. - * - * @return string - */ - function signatureBaseString () - { - $sig = array(); - $sig[] = $this->method; - $sig[] = $this->getRequestUrl(); - $sig[] = $this->getNormalizedParams(); - - return implode('&', array_map(array($this, 'urlencode'), $sig)); - } - - - /** - * Calculate the signature of the request, using the method in oauth_signature_method. - * The signature is returned encoded in the form as used in the url. So the base64 and - * urlencoding has been done. - * - * @param string consumer_secret - * @param string token_secret - * @exception when not all parts available - * @return string - */ - function calculateSignature ( $consumer_secret, $token_secret, $token_type = 'access' ) - { - $required = array( - 'oauth_consumer_key', - 'oauth_signature_method', - 'oauth_timestamp', - 'oauth_nonce' - ); - - if ($token_type !== false) - { - $required[] = 'oauth_token'; - } - - foreach ($required as $req) - { - if (!isset($this->param[$req])) - { - throw new OAuthException('Can\'t sign request, missing parameter "'.$req.'"'); - } - } - - $this->checks(); - - $base = $this->signatureBaseString(); - $signature = $this->calculateDataSignature($base, $consumer_secret, $token_secret, $this->param['oauth_signature_method']); - return $signature; - } - - - /** - * Calculate the signature of a string. - * Uses the signature method from the current parameters. - * - * @param string data - * @param string consumer_secret - * @param string token_secret - * @param string signature_method - * @exception OAuthException thrown when the signature method is unknown - * @return string signature - */ - function calculateDataSignature ( $data, $consumer_secret, $token_secret, $signature_method ) - { - if (is_null($data)) - { - $data = ''; - } - - $sig = $this->getSignatureMethod($signature_method); - return $sig->signature($this, $data, $consumer_secret, $token_secret); - } - - - /** - * Select a signature method from the list of available methods. - * We try to check the most secure methods first. - * - * @todo Let the signature method tell us how secure it is - * @param array methods - * @exception OAuthException when we don't support any method in the list - * @return string - */ - public function selectSignatureMethod ( $methods ) - { - if (in_array('HMAC-SHA1', $methods)) - { - $method = 'HMAC-SHA1'; - } - else if (in_array('MD5', $methods)) - { - $method = 'MD5'; - } - else - { - $method = false; - foreach ($methods as $m) - { - $m = strtoupper($m); - $m = preg_replace('/[^A-Z0-9]/', '_', $m); - if (file_exists(dirname(__FILE__).'/signature_method/OAuthSignatureMethod_'.$m.'.php')) - { - $method = $m; - break; - } - } - - if (empty($method)) - { - throw new OAuthException('None of the signing methods is supported.'); - } - } - return $method; - } - - - /** - * Fetch the signature object used for calculating and checking the signature base string - * - * @param string method - * @return OAuthSignatureMethod object - */ - function getSignatureMethod ( $method ) - { - $m = strtoupper($method); - $m = preg_replace('/[^A-Z0-9]/', '_', $m); - $class = 'OAuthSignatureMethod_'.$m; - - if (file_exists(dirname(__FILE__).'/signature_method/'.$class.'.php')) - { - require_once dirname(__FILE__).'/signature_method/'.$class.'.php'; - $sig = new $class(); - } - else - { - throw new OAuthException('Unsupported signature method "'.$m.'".'); - } - return $sig; - } - - - /** - * Perform some sanity checks. - * - * @exception OAuthException thrown when sanity checks failed - */ - function checks () - { - if (isset($this->param['oauth_version'])) - { - $version = $this->urldecode($this->param['oauth_version']); - if ($version != '1.0') - { - throw new OAuthException('Expected OAuth version 1.0, got "'.$this->param['oauth_version'].'"'); - } - } - } - - - /** - * Return the request method - * - * @return string - */ - function getMethod () - { - return $this->method; - } - - /** - * Return the complete parameter string for the signature check. - * All parameters are correctly urlencoded and sorted on name and value - * - * @return string - */ - function getNormalizedParams () - { - /* - // sort by name, then by value - // (needed when we start allowing multiple values with the same name) - $keys = array_keys($this->param); - $values = array_values($this->param); - array_multisort($keys, SORT_ASC, $values, SORT_ASC); - */ - $params = $this->param; - $normalized = array(); - - ksort($params); - foreach ($params as $key => $value) - { - // all names and values are already urlencoded, exclude the oauth signature - if ($key != 'oauth_signature') - { - if (is_array($value)) - { - $value_sort = $value; - sort($value_sort); - foreach ($value_sort as $v) - { - $normalized[] = $key.'='.$v; - } - } - else - { - $normalized[] = $key.'='.$value; - } - } - } - return implode('&', $normalized); - } - - - /** - * Return the normalised url for signature checks - */ - function getRequestUrl () - { - $url = $this->uri_parts['scheme'] . '://' - . $this->uri_parts['user'] . (!empty($this->uri_parts['pass']) ? ':' : '') - . $this->uri_parts['pass'] . (!empty($this->uri_parts['user']) ? '@' : '') - . $this->uri_parts['host']; - - if ( $this->uri_parts['port'] - && $this->uri_parts['port'] != $this->defaultPortForScheme($this->uri_parts['scheme'])) - { - $url .= ':'.$this->uri_parts['port']; - } - if (!empty($this->uri_parts['path'])) - { - $url .= $this->uri_parts['path']; - } - return $url; - } - - - /** - * Get a parameter, value is always urlencoded - * - * @param string name - * @param boolean urldecode set to true to decode the value upon return - * @return string value false when not found - */ - function getParam ( $name, $urldecode = false ) - { - if (isset($this->param[$name])) - { - $s = $this->param[$name]; - } - else if (isset($this->param[$this->urlencode($name)])) - { - $s = $this->param[$this->urlencode($name)]; - } - else - { - $s = false; - } - if (!empty($s) && $urldecode) - { - if (is_array($s)) - { - $s = array_map(array($this,'urldecode'), $s); - } - else - { - $s = $this->urldecode($s); - } - } - return $s; - } - - /** - * Set a parameter - * - * @param string name - * @param string value - * @param boolean encoded set to true when the values are already encoded - */ - function setParam ( $name, $value, $encoded = false ) - { - if (!$encoded) - { - $name_encoded = $this->urlencode($name); - if (is_array($value)) - { - foreach ($value as $v) - { - $this->param[$name_encoded][] = $this->urlencode($v); - } - } - else - { - $this->param[$name_encoded] = $this->urlencode($value); - } - } - else - { - $this->param[$name] = $value; - } - } - - - /** - * Re-encode all parameters so that they are encoded using RFC3986. - * Updates the $this->param attribute. - */ - protected function transcodeParams () - { - $params = $this->param; - $this->param = array(); - - foreach ($params as $name=>$value) - { - if (is_array($value)) - { - $this->param[$this->urltranscode($name)] = array_map(array($this,'urltranscode'), $value); - } - else - { - $this->param[$this->urltranscode($name)] = $this->urltranscode($value); - } - } - } - - - - /** - * Return the body of the OAuth request. - * - * @return string null when no body - */ - function getBody () - { - return $this->body; - } - - - /** - * Return the body of the OAuth request. - * - * @return string null when no body - */ - function setBody ( $body ) - { - $this->body = $body; - } - - - /** - * Parse the uri into its parts. Fill in the missing parts. - * - * @todo check for the use of https, right now we default to http - * @todo support for multiple occurences of parameters - * @param string $parameters optional extra parameters (from eg the http post) - */ - protected function parseUri ( $parameters ) - { - $ps = parse_url($this->uri); - - // Get the current/requested method - if (empty($ps['scheme'])) - { - $ps['scheme'] = 'http'; - } - else - { - $ps['scheme'] = strtolower($ps['scheme']); - } - - // Get the current/requested host - if (empty($ps['host'])) - { - if (isset($_SERVER['HTTP_HOST'])) - { - $ps['host'] = $_SERVER['HTTP_HOST']; - } - else - { - $ps['host'] = ''; - } - } - $ps['host'] = mb_strtolower($ps['host']); - if (!preg_match('/^[a-z0-9\.\-]+$/', $ps['host'])) - { - throw new OAuthException('Unsupported characters in host name'); - } - - // Get the port we are talking on - if (empty($ps['port'])) - { - $ps['port'] = $this->defaultPortForScheme($ps['scheme']); - } - - if (empty($ps['user'])) - { - $ps['user'] = ''; - } - if (empty($ps['pass'])) - { - $ps['pass'] = ''; - } - if (empty($ps['path'])) - { - $ps['path'] = '/'; - } - if (empty($ps['query'])) - { - $ps['query'] = ''; - } - if (empty($ps['fragment'])) - { - $ps['fragment'] = ''; - } - - // Now all is complete - parse all parameters - foreach (array($ps['query'], $parameters) as $params) - { - if (strlen($params) > 0) - { - $params = explode('&', $params); - foreach ($params as $p) - { - @list($name, $value) = explode('=', $p, 2); - $this->param[$name] = $value; - } - } - } - $this->uri_parts = $ps; - } - - - /** - * Return the default port for a scheme - * - * @param string scheme - * @return int - */ - protected function defaultPortForScheme ( $scheme ) - { - switch ($scheme) - { - case 'http': return 80; - case 'https': return 43; - default: - throw new OAuthException('Unsupported scheme type, expected http or https, got "'.$scheme.'"'); - break; - } - } - - - /** - * Encode a string according to the RFC3986 - * - * @param string s - * @return string - */ - function urlencode ( $s ) - { - if ($s === false) - { - return $s; - } - else - { - return str_replace('%7E', '~', rawurlencode($s)); - } - } - - /** - * Decode a string according to RFC3986. - * Also correctly decodes RFC1738 urls. - * - * @param string s - * @return string - */ - function urldecode ( $s ) - { - if ($s === false) - { - return $s; - } - else - { - return rawurldecode($s); - } - } - - /** - * urltranscode - make sure that a value is encoded using RFC3986. - * We use a basic urldecode() function so that any use of '+' as the - * encoding of the space character is correctly handled. - * - * @param string s - * @return string - */ - function urltranscode ( $s ) - { - if ($s === false) - { - return $s; - } - else - { - return $this->urlencode(urldecode($s)); - } - } - - - /** - * Parse the oauth parameters from the request headers - * Looks for something like: - * - * Authorization: OAuth realm="http://photos.example.net/authorize", - * oauth_consumer_key="dpf43f3p2l4k3l03", - * oauth_token="nnch734d00sl2jdk", - * oauth_signature_method="HMAC-SHA1", - * oauth_signature="tR3%2BTy81lMeYAr%2FFid0kMTYa%2FWM%3D", - * oauth_timestamp="1191242096", - * oauth_nonce="kllo9940pd9333jh", - * oauth_version="1.0" - */ - private function parseHeaders () - { -/* - $this->headers['Authorization'] = 'OAuth realm="http://photos.example.net/authorize", - oauth_consumer_key="dpf43f3p2l4k3l03", - oauth_token="nnch734d00sl2jdk", - oauth_signature_method="HMAC-SHA1", - oauth_signature="tR3%2BTy81lMeYAr%2FFid0kMTYa%2FWM%3D", - oauth_timestamp="1191242096", - oauth_nonce="kllo9940pd9333jh", - oauth_version="1.0"'; -*/ - if (isset($this->headers['Authorization'])) - { - $auth = trim($this->headers['Authorization']); - if (strncasecmp($auth, 'OAuth', 4) == 0) - { - $vs = explode(',', substr($auth, 6)); - foreach ($vs as $v) - { - if (strpos($v, '=')) - { - $v = trim($v); - list($name,$value) = explode('=', $v, 2); - if (!empty($value) && $value{0} == '"' && substr($value, -1) == '"') - { - $value = substr(substr($value, 1), 0, -1); - } - - if (strcasecmp($name, 'realm') == 0) - { - $this->realm = $value; - } - else - { - $this->param[$name] = $value; - } - } - } - } - } - } - - - /** - * Fetch the content type of the current request - * - * @return string - */ - private function getRequestContentType () - { - $content_type = 'application/octet-stream'; - if (!empty($_SERVER) && array_key_exists('CONTENT_TYPE', $_SERVER)) - { - list($content_type) = explode(';', $_SERVER['CONTENT_TYPE']); - } - return trim($content_type); - } - - - /** - * Get the body of a POST or PUT. - * - * Used for fetching the post parameters and to calculate the body signature. - * - * @return string null when no body present (or wrong content type for body) - */ - private function getRequestBody () - { - $body = null; - if ($this->method == 'POST' || $this->method == 'PUT') - { - $body = ''; - $fh = @fopen('php://input', 'r'); - if ($fh) - { - while (!feof($fh)) - { - $s = fread($fh, 1024); - if (is_string($s)) - { - $body .= $s; - } - } - fclose($fh); - } - } - return $body; - } - - - /** - * Simple function to perform a redirect (GET). - * Redirects the User-Agent, does not return. - * - * @param string uri - * @param array params parameters, urlencoded - * @exception OAuthException when redirect uri is illegal - */ - public function redirect ( $uri, $params ) - { - if (!empty($params)) - { - $q = array(); - foreach ($params as $name=>$value) - { - $q[] = $name.'='.$value; - } - $q_s = implode('&', $q); - - if (strpos($uri, '?')) - { - $uri .= '&'.$q_s; - } - else - { - $uri .= '?'.$q_s; - } - } - - // simple security - multiline location headers can inject all kinds of extras - $uri = preg_replace('/\s/', '%20', $uri); - if (strncasecmp($uri, 'http://', 7) && strncasecmp($uri, 'https://', 8)) - { - if (strpos($uri, '://')) - { - throw new OAuthException('Illegal protocol in redirect uri '.$uri); - } - $uri = 'http://'.$uri; - } - - header('HTTP/1.1 302 Found'); - header('Location: '.$uri); - echo ''; - exit(); - } - -} - - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/OAuthRequestLogger.php b/mod/oauth_api/vendors/oauth/library/OAuthRequestLogger.php deleted file mode 100644 index 934c1c53c..000000000 --- a/mod/oauth_api/vendors/oauth/library/OAuthRequestLogger.php +++ /dev/null @@ -1,274 +0,0 @@ - - * @date Dec 7, 2007 12:22:43 PM - * - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -class OAuthRequestLogger -{ - static private $logging = 0; - static private $enable_logging = null; - static private $store_log = null; - static private $note = ''; - static private $user_id = null; - static private $request_object = null; - static private $sent = null; - static private $received = null; - static private $log = array(); - - /** - * Start any logging, checks the system configuration if logging is needed. - * - * @param OAuthRequest $request_object - */ - static function start ( $request_object = null ) - { - if (defined('OAUTH_LOG_REQUEST')) - { - if (is_null(OAuthRequestLogger::$enable_logging)) - { - OAuthRequestLogger::$enable_logging = true; - } - if (is_null(OAuthRequestLogger::$store_log)) - { - OAuthRequestLogger::$store_log = true; - } - } - - if (OAuthRequestLogger::$enable_logging && !OAuthRequestLogger::$logging) - { - OAuthRequestLogger::$logging = true; - OAuthRequestLogger::$request_object = $request_object; - ob_start(); - - // Make sure we flush our log entry when we stop the request (eg on an exception) - register_shutdown_function(array('OAuthRequestLogger','flush')); - } - } - - - /** - * Force logging, needed for performing test connects independent from the debugging setting. - * - * @param boolean store_log (optional) true to store the log in the db - */ - static function enableLogging ( $store_log = null ) - { - OAuthRequestLogger::$enable_logging = true; - if (!is_null($store_log)) - { - OAuthRequestLogger::$store_log = $store_log; - } - } - - - /** - * Logs the request to the database, sends any cached output. - * Also called on shutdown, to make sure we always log the request being handled. - */ - static function flush () - { - if (OAuthRequestLogger::$logging) - { - OAuthRequestLogger::$logging = false; - - if (is_null(OAuthRequestLogger::$sent)) - { - // What has been sent to the user-agent? - $data = ob_get_contents(); - if (strlen($data) > 0) - { - ob_end_flush(); - } - elseif (ob_get_level()) - { - ob_end_clean(); - } - $hs = headers_list(); - $sent = implode("\n", $hs) . "\n\n" . $data; - } - else - { - // The request we sent - $sent = OAuthRequestLogger::$sent; - } - - if (is_null(OAuthRequestLogger::$received)) - { - // Build the request we received - $hs0 = getallheaders(); - $hs = array(); - foreach ($hs0 as $h => $v) - { - $hs[] = "$h: $v"; - } - - $data = ''; - $fh = @fopen('php://input', 'r'); - if ($fh) - { - while (!feof($fh)) - { - $s = fread($fh, 1024); - if (is_string($s)) - { - $data .= $s; - } - } - fclose($fh); - } - $received = implode("\n", $hs) . "\n\n" . $data; - } - else - { - // The answer we received - $received = OAuthRequestLogger::$received; - } - - // The request base string - if (OAuthRequestLogger::$request_object) - { - $base_string = OAuthRequestLogger::$request_object->signatureBaseString(); - } - else - { - $base_string = ''; - } - - // Figure out to what keys we want to log this request - $keys = array(); - if (OAuthRequestLogger::$request_object) - { - $consumer_key = OAuthRequestLogger::$request_object->getParam('oauth_consumer_key', true); - $token = OAuthRequestLogger::$request_object->getParam('oauth_token', true); - - switch (get_class(OAuthRequestLogger::$request_object)) - { - // tokens are access/request tokens by a consumer - case 'OAuthServer': - case 'OAuthRequestVerifier': - $keys['ocr_consumer_key'] = $consumer_key; - $keys['oct_token'] = $token; - break; - - // tokens are access/request tokens to a server - case 'OAuthRequester': - case 'OAuthRequestSigner': - $keys['osr_consumer_key'] = $consumer_key; - $keys['ost_token'] = $token; - break; - } - } - - // Log the request - if (OAuthRequestLogger::$store_log) - { - $store = OAuthStore::instance(); - $store->addLog($keys, $received, $sent, $base_string, OAuthRequestLogger::$note, OAuthRequestLogger::$user_id); - } - - OAuthRequestLogger::$log[] = array( - 'keys' => $keys, - 'received' => $received, - 'sent' => $sent, - 'base_string' => $base_string, - 'note' => OAuthRequestLogger::$note - ); - } - } - - - /** - * Add a note, used by the OAuthException to log all exceptions. - * - * @param string note - */ - static function addNote ( $note ) - { - OAuthRequestLogger::$note .= $note . "\n\n"; - } - - /** - * Set the OAuth request object being used - * - * @param OAuthRequest request_object - */ - static function setRequestObject ( $request_object ) - { - OAuthRequestLogger::$request_object = $request_object; - } - - - /** - * Set the relevant user (defaults to the current user) - * - * @param int user_id - */ - static function setUser ( $user_id ) - { - OAuthRequestLogger::$user_id = $user_id; - } - - - /** - * Set the request we sent - * - * @param string request - */ - static function setSent ( $request ) - { - OAuthRequestLogger::$sent = $request; - } - - /** - * Set the reply we received - * - * @param string request - */ - static function setReceived ( $reply ) - { - OAuthRequestLogger::$received = $reply; - } - - - /** - * Get the the log till now - * - * @return array - */ - static function getLog () - { - return OAuthRequestLogger::$log; - } -} - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/OAuthRequestSigner.php b/mod/oauth_api/vendors/oauth/library/OAuthRequestSigner.php deleted file mode 100644 index 9f83f287f..000000000 --- a/mod/oauth_api/vendors/oauth/library/OAuthRequestSigner.php +++ /dev/null @@ -1,209 +0,0 @@ - - * @date Nov 16, 2007 4:02:49 PM - * - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - - -require_once dirname(__FILE__) . '/OAuthStore.php'; -require_once dirname(__FILE__) . '/OAuthRequest.php'; - - -class OAuthRequestSigner extends OAuthRequest -{ - protected $request; - protected $store; - protected $usr_id = 0; - private $signed = false; - - - /** - * Construct the request to be signed. Parses or appends the parameters in the params url. - * When you supply an params array, then the params should not be urlencoded. - * When you supply a string, then it is assumed it is of the type application/x-www-form-urlencoded - * - * @param string request url - * @param string method PUT, GET, POST etc. - * @param mixed params string (for urlencoded data, or array with name/value pairs) - * @param string body optional body for PUT and/or POST requests - */ - function __construct ( $request, $method = 'GET', $params = null, $body = null ) - { - $this->store = OAuthStore::instance(); - - if (is_string($params)) - { - parent::__construct($request, $method, $params); - } - else - { - parent::__construct($request, $method); - if (is_array($params)) - { - foreach ($params as $name => $value) - { - $this->setParam($name, $value); - } - } - } - - // With put/ post we might have a body (not for application/x-www-form-urlencoded requests) - if ($method == 'PUT' || $method == 'POST') - { - $this->setBody($body); - } - } - - - /** - * Reset the 'signed' flag, so that any changes in the parameters force a recalculation - * of the signature. - */ - function setUnsigned () - { - $this->signed = false; - } - - - /** - * Sign our message in the way the server understands. - * Set the needed oauth_xxxx parameters. - * - * @param int usr_id (optional) user that wants to sign this request - * @param array secrets secrets used for signing, when empty then secrets will be fetched from the token registry - * @param string name name of the token to be used for signing - * @exception OAuthException when there is no oauth relation with the server - * @exception OAuthException when we don't support the signing methods of the server - */ - function sign ( $usr_id = 0, $secrets = null, $name = '' ) - { - $url = $this->getRequestUrl(); - if (empty($secrets)) - { - // get the access tokens for the site (on an user by user basis) - $secrets = $this->store->getSecretsForSignature($url, $usr_id, $name); - } - if (empty($secrets)) - { - throw new OAuthException('No OAuth relation with the server for at "'.$url.'"'); - } - - $signature_method = $this->selectSignatureMethod($secrets['signature_methods']); - - $token = isset($secrets['token']) ? $secrets['token'] : ''; - $token_secret = isset($secrets['token_secret']) ? $secrets['token_secret'] : ''; - - $this->setParam('oauth_signature_method',$signature_method); - $this->setParam('oauth_signature', ''); - $this->setParam('oauth_nonce', !empty($secrets['nonce']) ? $secrets['nonce'] : uniqid('')); - $this->setParam('oauth_timestamp', !empty($secrets['timestamp']) ? $secrets['timestamp'] : time()); - $this->setParam('oauth_token', $token); - $this->setParam('oauth_consumer_key', $secrets['consumer_key']); - $this->setParam('oauth_version', '1.0'); - - $body = $this->getBody(); - if (!is_null($body)) - { - // We also need to sign the body, use the default signature method - $body_signature = $this->calculateDataSignature($body, $secrets['consumer_secret'], $token_secret, $signature_method); - $this->setParam('xoauth_body_signature', $body_signature, true); - } - - $signature = $this->calculateSignature($secrets['consumer_secret'], $token_secret); - $this->setParam('oauth_signature', $signature, true); - - $this->signed = true; - $this->usr_id = $usr_id; - } - - - /** - * Builds the Authorization header for the request. - * Adds all oauth_ and xoauth_ parameters to the Authorization header. - * - * @return string - */ - function getAuthorizationHeader () - { - if (!$this->signed) - { - $this->sign($this->usr_id); - } - $h = array(); - $h[] = 'Authorization: OAuth realm=""'; - foreach ($this->param as $name => $value) - { - if (strncmp($name, 'oauth_', 6) == 0 || strncmp($name, 'xoauth_', 7) == 0) - { - $h[] = $name.'="'.$value.'"'; - } - } - $hs = implode(', ', $h); - return $hs; - } - - - /** - * Builds the application/x-www-form-urlencoded parameter string. Can be appended as - * the query part to a GET or inside the request body for a POST. - * - * @param boolean oauth_as_header (optional) set to false to include oauth parameters - * @return string - */ - function getQueryString ( $oauth_as_header = true ) - { - $parms = array(); - foreach ($this->param as $name => $value) - { - if ( !$oauth_as_header - || (strncmp($name, 'oauth_', 6) != 0 && strncmp($name, 'xoauth_', 7) != 0)) - { - if (is_array($value)) - { - foreach ($value as $v) - { - $parms[] = $name.'='.$v; - } - } - else - { - $parms[] = $name.'='.$value; - } - } - } - return implode('&', $parms); - } - -} - - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/OAuthRequestVerifier.php b/mod/oauth_api/vendors/oauth/library/OAuthRequestVerifier.php deleted file mode 100644 index 4b4db9685..000000000 --- a/mod/oauth_api/vendors/oauth/library/OAuthRequestVerifier.php +++ /dev/null @@ -1,262 +0,0 @@ - - * @date Nov 16, 2007 4:35:03 PM - * - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -require_once dirname(__FILE__) . '/OAuthStore.php'; -require_once dirname(__FILE__) . '/OAuthRequest.php'; - - -class OAuthRequestVerifier extends OAuthRequest -{ - private $request; - private $store; - - /** - * Construct the request to be verified - * - * @param string request - * @param string method - */ - function __construct ( $uri = null, $method = 'GET' ) - { - $this->store = OAuthStore::instance(); - parent::__construct($uri, $method); - - OAuthRequestLogger::start($this); - } - - - /** - * See if the current request is signed with OAuth - * - * @return boolean - */ - static public function requestIsSigned () - { - if (isset($_REQUEST['oauth_signature'])) - { - $signed = true; - } - else - { - $hs = getallheaders(); - if (isset($hs['Authorization']) && strpos($hs['Authorization'], 'oauth_signature') !== false) - { - $signed = true; - } - else - { - $signed = false; - } - } - return $signed; - } - - - /** - * Verify the request if it seemed to be signed. - * - * @param string token_type the kind of token needed, defaults to 'access' - * @exception OAuthException thrown when the request did not verify - * @return boolean true when signed, false when not signed - */ - public function verifyIfSigned ( $token_type = 'access' ) - { - if ($this->getParam('oauth_consumer_key')) - { - OAuthRequestLogger::start($this); - $this->verify($token_type); - $signed = true; - OAuthRequestLogger::flush(); - } - else - { - $signed = false; - } - return $signed; - } - - - /** - * Verify the request - * - * @param string token_type the kind of token needed, defaults to 'access' (false, 'access', 'request') - * @exception OAuthException thrown when the request did not verify - * @return int user_id associated with token (false when no user associated) - */ - public function verify ( $token_type = 'access' ) - { - $consumer_key = $this->getParam('oauth_consumer_key'); - $token = $this->getParam('oauth_token'); - $user_id = false; - - if ($consumer_key && ($token_type === false || $token)) - { - $secrets = $this->store->getSecretsForVerify( $this->urldecode($consumer_key), - $this->urldecode($token), - $token_type); - - $this->store->checkServerNonce( $this->urldecode($consumer_key), - $this->urldecode($token), - $this->getParam('oauth_timestamp', true), - $this->getParam('oauth_nonce', true)); - - $oauth_sig = $this->getParam('oauth_signature'); - if (empty($oauth_sig)) - { - throw new OAuthException('Verification of signature failed (no oauth_signature in request).'); - } - - try - { - $this->verifySignature($secrets['consumer_secret'], $secrets['token_secret'], $token_type); - } - catch (OAuthException $e) - { - throw new OAuthException('Verification of signature failed (signature base string was "'.$this->signatureBaseString().'").'); - } - - // Check the optional body signature - if ($this->getParam('xoauth_body_signature')) - { - $method = $this->getParam('xoauth_body_signature_method'); - if (empty($method)) - { - $method = $this->getParam('oauth_signature_method'); - } - - try - { - $this->verifyDataSignature($this->getBody(), $secrets['consumer_secret'], $secrets['token_secret'], $method, $this->getParam('xoauth_body_signature')); - } - catch (OAuthException $e) - { - throw new OAuthException('Verification of body signature failed.'); - } - } - - // All ok - fetch the user associated with this request - if (isset($secrets['user_id'])) - { - $user_id = $secrets['user_id']; - } - - // Check if the consumer wants us to reset the ttl of this token - $ttl = $this->getParam('xoauth_token_ttl', true); - if (is_numeric($ttl)) - { - $this->store->setConsumerAccessTokenTtl($this->urldecode($token), $ttl); - } - } - else - { - throw new OAuthException('Can\'t verify request, missing oauth_consumer_key or oauth_token'); - } - return $user_id; - } - - - - /** - * Verify the signature of the request, using the method in oauth_signature_method. - * The signature is returned encoded in the form as used in the url. So the base64 and - * urlencoding has been done. - * - * @param string consumer_secret - * @param string token_secret - * @exception OAuthException thrown when the signature method is unknown - * @exception OAuthException when not all parts available - * @exception OAuthException when signature does not match - */ - public function verifySignature ( $consumer_secret, $token_secret, $token_type = 'access' ) - { - $required = array( - 'oauth_consumer_key', - 'oauth_signature_method', - 'oauth_timestamp', - 'oauth_nonce', - 'oauth_signature' - ); - - if ($token_type !== false) - { - $required[] = 'oauth_token'; - } - - foreach ($required as $req) - { - if (!isset($this->param[$req])) - { - throw new OAuthException('Can\'t verify request signature, missing parameter "'.$req.'"'); - } - } - - $this->checks(); - - $base = $this->signatureBaseString(); - $this->verifyDataSignature($base, $consumer_secret, $token_secret, $this->param['oauth_signature_method'], $this->param['oauth_signature']); - } - - - - /** - * Verify the signature of a string. - * - * @param string data - * @param string consumer_secret - * @param string token_secret - * @param string signature_method - * @param string signature - * @exception OAuthException thrown when the signature method is unknown - * @exception OAuthException when signature does not match - */ - public function verifyDataSignature ( $data, $consumer_secret, $token_secret, $signature_method, $signature ) - { - if (is_null($data)) - { - $data = ''; - } - - $sig = $this->getSignatureMethod($signature_method); - if (!$sig->verify($this, $data, $consumer_secret, $token_secret, $signature)) - { - throw new OAuthException('Signature verification failed ('.$signature_method.')'); - } - } - -} - - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/OAuthRequester.php b/mod/oauth_api/vendors/oauth/library/OAuthRequester.php deleted file mode 100644 index 87f9586c0..000000000 --- a/mod/oauth_api/vendors/oauth/library/OAuthRequester.php +++ /dev/null @@ -1,508 +0,0 @@ - - * @date Nov 20, 2007 1:41:38 PM - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -require_once dirname(__FILE__) . '/OAuthRequestSigner.php'; -require_once dirname(__FILE__) . '/body/OAuthBodyContentDisposition.php'; - - -class OAuthRequester extends OAuthRequestSigner -{ - protected $files; - - /** - * Construct a new request signer. Perform the request with the doRequest() method below. - * - * A request can have either one file or a body, not both. - * - * The files array consists of arrays: - * - file the filename/path containing the data for the POST/PUT - * - data data for the file, omit when you have a file - * - mime content-type of the file - * - filename filename for content disposition header - * - * When OAuth (and PHP) can support multipart/form-data then we can handle more than one file. - * For now max one file, with all the params encoded in the query string. - * - * @param string request - * @param string method http method. GET, PUT, POST etc. - * @param array params name=>value array with request parameters - * @param string body optional body to send - * @param array files optional files to send (max 1 till OAuth support multipart/form-data posts) - */ - function __construct ( $request, $method = 'GET', $params = null, $body = null, $files = null ) - { - parent::__construct($request, $method, $params, $body); - - // When there are files, then we can construct a POST with a single file - if (!empty($files)) - { - $empty = true; - foreach ($files as $f) - { - $empty = $empty && empty($f['file']) && !isset($f['data']); - } - - if (!$empty) - { - if (!is_null($body)) - { - throw new OAuthException('When sending files, you can\'t send a body as well.'); - } - $this->files = $files; - } - } - } - - - /** - * Perform the request, returns the response code, headers and body. - * - * @param int usr_id optional user id for which we make the request - * @param array curl_options optional extra options for curl request - * @param array options options like name and token_ttl - * @exception OAuthException when authentication not accepted - * @exception OAuthException when signing was not possible - * @return array (code=>int, headers=>array(), body=>string) - */ - function doRequest ( $usr_id = 0, $curl_options = array(), $options = array() ) - { - $name = isset($options['name']) ? $options['name'] : ''; - if (isset($options['token_ttl'])) - { - $this->setParam('xoauth_token_ttl', intval($options['token_ttl'])); - } - - if (!empty($this->files)) - { - // At the moment OAuth does not support multipart/form-data, so try to encode - // the supplied file (or data) as the request body and add a content-disposition header. - list($extra_headers, $body) = OAuthBodyContentDisposition::encodeBody($this->files); - $this->setBody($body); - $curl_options = $this->prepareCurlOptions($curl_options, $extra_headers); - } - $this->sign($usr_id, null, $name); - $text = $this->curl_raw($curl_options); - $result = $this->curl_parse($text); - if ($result['code'] >= 400) - { - throw new OAuthException('Request failed with code ' . $result['code'] . ': ' . $result['body']); - } - - // Record the token time to live for this server access token, immediate delete iff ttl <= 0 - // Only done on a succesful request. - $token_ttl = $this->getParam('xoauth_token_ttl', false); - if (is_numeric($token_ttl)) - { - $this->store->setServerTokenTtl($this->getParam('oauth_consumer_key',true), $this->getParam('oauth_token',true), $token_ttl); - } - - return $result; - } - - - /** - * Request a request token from the site belonging to consumer_key - * - * @param string consumer_key - * @param int usr_id - * @param array params (optional) extra arguments for when requesting the request token - * @param string method (optional) change the method of the request, defaults to POST (as it should be) - * @param array options (optional) options like name and token_ttl - * @exception OAuthException when no key could be fetched - * @exception OAuthException when no server with consumer_key registered - * @return array (authorize_uri, token) - */ - static function requestRequestToken ( $consumer_key, $usr_id, $params = null, $method = 'POST', $options = array() ) - { - OAuthRequestLogger::start(); - - if (isset($options['token_ttl']) && is_numeric($options['token_ttl'])) - { - $params['xoauth_token_ttl'] = intval($options['token_ttl']); - } - - $store = OAuthStore::instance(); - $r = $store->getServer($consumer_key, $usr_id); - $uri = $r['request_token_uri']; - - $oauth = new OAuthRequester($uri, $method, $params); - $oauth->sign($usr_id, $r); - $text = $oauth->curl_raw(); - - if (empty($text)) - { - throw new OAuthException('No answer from the server "'.$uri.'" while requesting a request token'); - } - $data = $oauth->curl_parse($text); - if ($data['code'] != 200) - { - throw new OAuthException('Unexpected result from the server "'.$uri.'" ('.$data['code'].') while requesting a request token'); - } - $token = array(); - $params = explode('&', $data['body']); - foreach ($params as $p) - { - @list($name, $value) = explode('=', $p, 2); - $token[$name] = $oauth->urldecode($value); - } - - if (!empty($token['oauth_token']) && !empty($token['oauth_token_secret'])) - { - $opts = array(); - if (isset($options['name'])) - { - $opts['name'] = $options['name']; - } - if (isset($token['xoauth_token_ttl'])) - { - $opts['token_ttl'] = $token['xoauth_token_ttl']; - } - $store->addServerToken($consumer_key, 'request', $token['oauth_token'], $token['oauth_token_secret'], $usr_id, $opts); - } - else - { - throw new OAuthException('The server "'.$uri.'" did not return the oauth_token or the oauth_token_secret'); - } - - OAuthRequestLogger::flush(); - - // Now we can direct a browser to the authorize_uri - return array( - 'authorize_uri' => $r['authorize_uri'], - 'token' => $token['oauth_token'] - ); - } - - - /** - * Request an access token from the site belonging to consumer_key. - * Before this we got an request token, now we want to exchange it for - * an access token. - * - * @param string consumer_key - * @param string token - * @param int usr_id user requesting the access token - * @param string method (optional) change the method of the request, defaults to POST (as it should be) - * @param array options (optional) extra options for request, eg token_ttl - * @exception OAuthException when no key could be fetched - * @exception OAuthException when no server with consumer_key registered - */ - static function requestAccessToken ( $consumer_key, $token, $usr_id, $method = 'POST', $options = array() ) - { - OAuthRequestLogger::start(); - - $store = OAuthStore::instance(); - $r = $store->getServerTokenSecrets($consumer_key, $token, 'request', $usr_id); - $uri = $r['access_token_uri']; - $token_name = $r['token_name']; - - // Delete the server request token, this one was for one use only - $store->deleteServerToken($consumer_key, $r['token'], 0, true); - - // Try to exchange our request token for an access token - $oauth = new OAuthRequester($uri, $method); - - if (isset($options['token_ttl']) && is_numeric($options['token_ttl'])) - { - $oauth->setParam('xoauth_token_ttl', intval($options['token_ttl'])); - } - - OAuthRequestLogger::setRequestObject($oauth); - - $oauth->sign($usr_id, $r); - $text = $oauth->curl_raw(); - if (empty($text)) - { - throw new OAuthException('No answer from the server "'.$uri.'" while requesting a request token'); - } - $data = $oauth->curl_parse($text); - - if ($data['code'] != 200) - { - throw new OAuthException('Unexpected result from the server "'.$uri.'" ('.$data['code'].') while requesting a request token'); - } - - $token = array(); - $params = explode('&', $data['body']); - foreach ($params as $p) - { - @list($name, $value) = explode('=', $p, 2); - $token[$oauth->urldecode($name)] = $oauth->urldecode($value); - } - - if (!empty($token['oauth_token']) && !empty($token['oauth_token_secret'])) - { - $opts = array(); - $opts['name'] = $token_name; - if (isset($token['xoauth_token_ttl'])) - { - $opts['token_ttl'] = $token['xoauth_token_ttl']; - } - $store->addServerToken($consumer_key, 'access', $token['oauth_token'], $token['oauth_token_secret'], $usr_id, $opts); - } - else - { - throw new OAuthException('The server "'.$uri.'" did not return the oauth_token or the oauth_token_secret'); - } - - OAuthRequestLogger::flush(); - } - - - - /** - * Open and close a curl session passing all the options to the curl libs - * - * @param string url the http address to fetch - * @exception OAuthException when temporary file for PUT operation could not be created - * @return string the result of the curl action - */ - protected function curl_raw ( $opts = array() ) - { - if (isset($opts[CURLOPT_HTTPHEADER])) - { - $header = $opts[CURLOPT_HTTPHEADER]; - } - else - { - $header = array(); - } - - $ch = curl_init(); - $method = $this->getMethod(); - $url = $this->getRequestUrl(); - $header[] = $this->getAuthorizationHeader(); - $query = $this->getQueryString(); - $body = $this->getBody(); - - $has_content_type = false; - foreach ($header as $h) - { - if (strncasecmp($h, 'Content-Type:', 13) == 0) - { - $has_content_type = true; - } - } - - if (!is_null($body)) - { - if ($method == 'TRACE') - { - throw new OAuthException('A body can not be sent with a TRACE operation'); - } - - // PUT and POST allow a request body - if (!empty($query)) - { - $url .= '?'.$query; - } - - // Make sure that the content type of the request is ok - if (!$has_content_type) - { - $header[] = 'Content-Type: application/octet-stream'; - $has_content_type = true; - } - - // When PUTting, we need to use an intermediate file (because of the curl implementation) - if ($method == 'PUT') - { - /* - if (version_compare(phpversion(), '5.2.0') >= 0) - { - // Use the data wrapper to create the file expected by the put method - $put_file = fopen('data://application/octet-stream;base64,'.base64_encode($body)); - } - */ - - $put_file = @tmpfile(); - if (!$put_file) - { - throw new OAuthException('Could not create tmpfile for PUT operation'); - } - fwrite($put_file, $body); - fseek($put_file, 0); - - curl_setopt($ch, CURLOPT_PUT, true); - curl_setopt($ch, CURLOPT_INFILE, $put_file); - curl_setopt($ch, CURLOPT_INFILESIZE, strlen($body)); - } - else - { - curl_setopt($ch, CURLOPT_POST, true); - curl_setopt($ch, CURLOPT_POSTFIELDS, $body); - } - } - else - { - // a 'normal' request, no body to be send - if ($method == 'POST') - { - if (!$has_content_type) - { - $header[] = 'Content-Type: application/x-www-form-urlencoded'; - $has_content_type = true; - } - - curl_setopt($ch, CURLOPT_POST, true); - curl_setopt($ch, CURLOPT_POSTFIELDS, $query); - } - else - { - if (!empty($query)) - { - $url .= '?'.$query; - } - if ($method != 'GET') - { - curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $method); - } - } - } - - curl_setopt($ch, CURLOPT_HTTPHEADER, $header); - curl_setopt($ch, CURLOPT_USERAGENT, 'anyMeta/OAuth 1.0 - ($LastChangedRevision: 63 $)'); - curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_HEADER, true); - - foreach ($opts as $k => $v) - { - if ($k != CURLOPT_HTTPHEADER) - { - curl_setopt($ch, $k, $v); - } - } - - $txt = curl_exec($ch); - curl_close($ch); - - if (!empty($put_file)) - { - fclose($put_file); - } - - // Tell the logger what we requested and what we received back - $data = $method . " $url\n".implode("\n",$header); - if (is_string($body)) - { - $data .= "\n\n".$body; - } - else if ($method == 'POST') - { - $data .= "\n\n".$query; - } - - OAuthRequestLogger::setSent($data, $body); - OAuthRequestLogger::setReceived($txt); - - return $txt; - } - - - /** - * Parse an http response - * - * @param string response the http text to parse - * @return array (code=>http-code, headers=>http-headers, body=>body) - */ - protected function curl_parse ( $response ) - { - if (empty($response)) - { - return array(); - } - - @list($headers,$body) = explode("\r\n\r\n",$response,2); - $lines = explode("\r\n",$headers); - - if (preg_match('@^HTTP/[0-9]\.[0-9] +100@', $lines[0])) - { - /* HTTP/1.x 100 Continue - * the real data is on the next line - */ - @list($headers,$body) = explode("\r\n\r\n",$body,2); - $lines = explode("\r\n",$headers); - } - - // first line of headers is the HTTP response code - $http_line = array_shift($lines); - if (preg_match('@^HTTP/[0-9]\.[0-9] +([0-9]{3})@', $http_line, $matches)) - { - $code = $matches[1]; - } - - // put the rest of the headers in an array - $headers = array(); - foreach ($lines as $l) - { - list($k, $v) = explode(': ', $l, 2); - $headers[strtolower($k)] = $v; - } - - return array( 'code' => $code, 'headers' => $headers, 'body' => $body); - } - - - /** - * Mix the given headers into the headers that were given to curl - * - * @param array curl_options - * @param array extra_headers - * @return array new curl options - */ - protected function prepareCurlOptions ( $curl_options, $extra_headers ) - { - $hs = array(); - if (!empty($curl_options[CURLOPT_HTTPHEADER]) && is_array($curl_options[CURLOPT_HTTPHEADER])) - { - foreach ($curl_options[CURLOPT_HTTPHEADER] as $h) - { - list($opt, $val) = explode(':', $h, 2); - $opt = str_replace(' ', '-', ucwords(str_replace('-', ' ', $opt))); - $hs[$opt] = $val; - } - } - - $curl_options[CURLOPT_HTTPHEADER] = array(); - $hs = array_merge($hs, $extra_headers); - foreach ($hs as $h => $v) - { - $curl_options[CURLOPT_HTTPHEADER][] = "$h: $v"; - } - return $curl_options; - } -} - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/OAuthServer.php b/mod/oauth_api/vendors/oauth/library/OAuthServer.php deleted file mode 100644 index c7f9097b3..000000000 --- a/mod/oauth_api/vendors/oauth/library/OAuthServer.php +++ /dev/null @@ -1,232 +0,0 @@ - - * @date Nov 27, 2007 12:36:38 PM - * - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -require_once 'OAuthRequestVerifier.php'; - -class OAuthServer extends OAuthRequestVerifier -{ - /** - * Handle the request_token request. - * Returns the new request token and request token secret. - * - * TODO: add correct result code to exception - * - * @return string returned request token, false on an error - */ - public function requestToken () - { - OAuthRequestLogger::start($this); - try - { - $this->verify(false); - - $options = array(); - $ttl = $this->getParam('xoauth_token_ttl', false); - if ($ttl) - { - $options['token_ttl'] = $ttl; - } - - // Create a request token - $store = OAuthStore::instance(); - $token = $store->addConsumerRequestToken($this->getParam('oauth_consumer_key', true), $options); - $result = 'oauth_token='.$this->urlencode($token['token']) - .'&oauth_token_secret='.$this->urlencode($token['token_secret']); - - if (!empty($token['token_ttl'])) - { - $result .= '&xoauth_token_ttl='.$this->urlencode($token['token_ttl']); - } - - $request_token = $token['token']; - - header('HTTP/1.1 200 OK'); - header('Content-Length: '.strlen($result)); - header('Content-Type: application/x-www-form-urlencoded'); - - echo $result; - } - catch (OAuthException $e) - { - $request_token = false; - - header('HTTP/1.1 401 Unauthorized'); - header('Content-Type: text/plain'); - - echo "OAuth Verification Failed: " . $e->getMessage(); - } - - OAuthRequestLogger::flush(); - return $request_token; - } - - - /** - * Verify the start of an authorization request. Verifies if the request token is valid. - * Next step is the method authorizeFinish() - * - * Nota bene: this stores the current token, consumer key and callback in the _SESSION - * - * @exception OAuthException thrown when not a valid request - * @return array token description - */ - public function authorizeVerify ( ) - { - OAuthRequestLogger::start($this); - - $store = OAuthStore::instance(); - $token = $this->getParam('oauth_token', true); - $rs = $store->getConsumerRequestToken($token); - if (empty($rs)) - { - throw new OAuthException('Unknown request token "'.$token.'"'); - } - - // We need to remember the callback - if ( empty($_SESSION['verify_oauth_token']) - || strcmp($_SESSION['verify_oauth_token'], $rs['token'])) - { - $_SESSION['verify_oauth_token'] = $rs['token']; - $_SESSION['verify_oauth_consumer_key'] = $rs['consumer_key']; - $_SESSION['verify_oauth_callback'] = $this->getParam('oauth_callback', true); - } - OAuthRequestLogger::flush(); - return $rs; - } - - - /** - * Overrule this method when you want to display a nice page when - * the authorization is finished. This function does not know if the authorization was - * succesfull, you need to check the token in the database. - * - * @param boolean authorized if the current token (oauth_token param) is authorized or not - * @param int user_id user for which the token was authorized (or denied) - */ - public function authorizeFinish ( $authorized, $user_id ) - { - OAuthRequestLogger::start($this); - - $token = $this->getParam('oauth_token', true); - if ( isset($_SESSION['verify_oauth_token']) - && $_SESSION['verify_oauth_token'] == $token) - { - // Flag the token as authorized, or remove the token when not authorized - $store = OAuthStore::instance(); - - // Fetch the referrer host from the oauth callback parameter - $referrer_host = ''; - $oauth_callback = false; - if (!empty($_SESSION['verify_oauth_callback'])) - { - $oauth_callback = $_SESSION['verify_oauth_callback']; - $ps = parse_url($oauth_callback); - if (isset($ps['host'])) - { - $referrer_host = $ps['host']; - } - } - - if ($authorized) - { - OAuthRequestLogger::addNote('Authorized token "'.$token.'" for user '.$user_id.' with referrer "'.$referrer_host.'"'); - $store->authorizeConsumerRequestToken($token, $user_id, $referrer_host); - } - else - { - OAuthRequestLogger::addNote('Authorization rejected for token "'.$token.'" for user '.$user_id."\nToken has been deleted"); - $store->deleteConsumerRequestToken($token); - } - - if (!empty($oauth_callback)) - { - $this->redirect($oauth_callback, array('oauth_token'=>rawurlencode($token))); - } - } - OAuthRequestLogger::flush(); - } - - - /** - * Exchange a request token for an access token. - * The exchange is only succesful iff the request token has been authorized. - * - * Never returns, calls exit() when token is exchanged or when error is returned. - */ - public function accessToken () - { - OAuthRequestLogger::start($this); - - try - { - $this->verify('request'); - - $options = array(); - $ttl = $this->getParam('xoauth_token_ttl', false); - if ($ttl) - { - $options['token_ttl'] = $ttl; - } - - $store = OAuthStore::instance(); - $token = $store->exchangeConsumerRequestForAccessToken($this->getParam('oauth_token', true), $options); - $result = 'oauth_token='.$this->urlencode($token['token']) - .'&oauth_token_secret='.$this->urlencode($token['token_secret']); - - if (!empty($token['token_ttl'])) - { - $result .= '&xoauth_token_ttl='.$this->urlencode($token['token_ttl']); - } - - header('HTTP/1.1 200 OK'); - header('Content-Length: '.strlen($result)); - header('Content-Type: application/x-www-form-urlencoded'); - - echo $result; - } - catch (OAuthException $e) - { - header('HTTP/1.1 401 Access Denied'); - header('Content-Type: text/plain'); - - echo "OAuth Verification Failed: " . $e->getMessage(); - } - - OAuthRequestLogger::flush(); - exit(); - } -} - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/OAuthStore.php b/mod/oauth_api/vendors/oauth/library/OAuthStore.php deleted file mode 100644 index 1841ab5fa..000000000 --- a/mod/oauth_api/vendors/oauth/library/OAuthStore.php +++ /dev/null @@ -1,86 +0,0 @@ - - * @date Nov 16, 2007 4:03:30 PM - * - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -require_once dirname(__FILE__) . '/OAuthException.php'; - -class OAuthStore -{ - static private $instance = false; - - /** - * Request an instance of the OAuthStore - */ - public static function instance ( $store = 'MySQL', $options = array() ) - { - if (!OAuthStore::$instance) - { - // Select the store you want to use - if (strpos($store, '/') === false) - { - $class = 'OAuthStore'.$store; - $file = dirname(__FILE__) . '/store/'.$class.'.php'; - } - else - { - $file = $store; - $store = basename($file, '.php'); - $class = $store; - } - - if (is_file($file)) - { - require_once $file; - - if (class_exists($class)) - { - OAuthStore::$instance = new $class($options); - } - else - { - throw new OAuthException('Could not find class '.$class.' in file '.$file); - } - } - else - { - throw new OAuthException('No OAuthStore for '.$store.' (file '.$file.')'); - } - } - return OAuthStore::$instance; - } -} - - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/body/OAuthBodyContentDisposition.php b/mod/oauth_api/vendors/oauth/library/body/OAuthBodyContentDisposition.php deleted file mode 100644 index 84123b6d0..000000000 --- a/mod/oauth_api/vendors/oauth/library/body/OAuthBodyContentDisposition.php +++ /dev/null @@ -1,129 +0,0 @@ - - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -class OAuthBodyContentDisposition -{ - /** - * Builds the request string. - * - * The files array can be a combination of the following (either data or file): - * - * file => "path/to/file", filename=, mime=, data= - * - * @param array files (name => filedesc) (not urlencoded) - * @return array (headers, body) - */ - static function encodeBody ( $files ) - { - $headers = array(); - $body = null; - - // 1. Add all the files to the post - if (!empty($files)) - { - foreach ($files as $name => $f) - { - $data = false; - $filename = false; - - if (isset($f['filename'])) - { - $filename = $f['filename']; - } - - if (!empty($f['file'])) - { - $data = @file_get_contents($f['file']); - if ($data === false) - { - throw new OAuthException(sprintf('Could not read the file "%s" for request body', $f['file'])); - } - if (empty($filename)) - { - $filename = basename($f['file']); - } - } - else if (isset($f['data'])) - { - $data = $f['data']; - } - - // When there is data, add it as a request body, otherwise silently skip the upload - if ($data !== false) - { - if (isset($headers['Content-Disposition'])) - { - throw new OAuthException('Only a single file (or data) allowed in a signed PUT/POST request body.'); - } - - if (empty($filename)) - { - $filename = 'untitled'; - } - $mime = !empty($f['mime']) ? $f['mime'] : 'application/octet-stream'; - - $headers['Content-Disposition'] = 'attachment; filename="'.OAuthBodyContentDisposition::encodeParameterName($filename).'"'; - $headers['Content-Type'] = $mime; - - $body = $data; - } - - } - - // When we have a body, add the content-length - if (!is_null($body)) - { - $headers['Content-Length'] = strlen($body); - } - } - return array($headers, $body); - } - - - /** - * Encode a parameter's name for use in a multipart header. - * For now we do a simple filter that removes some unwanted characters. - * We might want to implement RFC1522 here. See http://tools.ietf.org/html/rfc1522 - * - * @param string name - * @return string - */ - static function encodeParameterName ( $name ) - { - return preg_replace('/[^\x20-\x7f]|"/', '-', $name); - } -} - - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/body/OAuthBodyMultipartFormdata.php b/mod/oauth_api/vendors/oauth/library/body/OAuthBodyMultipartFormdata.php deleted file mode 100644 index 048fdeb63..000000000 --- a/mod/oauth_api/vendors/oauth/library/body/OAuthBodyMultipartFormdata.php +++ /dev/null @@ -1,143 +0,0 @@ - - * @date Jan 31, 2008 12:50:05 PM - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - - -class OAuthBodyMultipartFormdata -{ - /** - * Builds the request string. - * - * The files array can be a combination of the following (either data or file): - * - * file => "path/to/file", filename=, mime=, data= - * - * @param array params (name => value) (all names and values should be urlencoded) - * @param array files (name => filedesc) (not urlencoded) - * @return array (headers, body) - */ - static function encodeBody ( $params, $files ) - { - $headers = array(); - $body = ''; - $boundary = 'OAuthRequester_'.md5(uniqid('multipart') . microtime()); - $headers['Content-Type'] = 'multipart/form-data; boundary=' . $boundary; - - - // 1. Add the parameters to the post - if (!empty($params)) - { - foreach ($params as $name => $value) - { - $body .= '--'.$boundary."\r\n"; - $body .= 'Content-Disposition: form-data; name="'.OAuthBodyMultipartFormdata::encodeParameterName(rawurldecode($name)).'"'; - $body .= "\r\n\r\n"; - $body .= urldecode($value); - $body .= "\r\n"; - } - } - - // 2. Add all the files to the post - if (!empty($files)) - { - $untitled = 1; - - foreach ($files as $name => $f) - { - $data = false; - $filename = false; - - if (isset($f['filename'])) - { - $filename = $f['filename']; - } - - if (!empty($f['file'])) - { - $data = @file_get_contents($f['file']); - if ($data === false) - { - throw new OAuthException(sprintf('Could not read the file "%s" for form-data part', $f['file'])); - } - if (empty($filename)) - { - $filename = basename($f['file']); - } - } - else if (isset($f['data'])) - { - $data = $f['data']; - } - - // When there is data, add it as a form-data part, otherwise silently skip the upload - if ($data !== false) - { - if (empty($filename)) - { - $filename = sprintf('untitled-%d', $untitled++); - } - $mime = !empty($f['mime']) ? $f['mime'] : 'application/octet-stream'; - $body .= '--'.$boundary."\r\n"; - $body .= 'Content-Disposition: form-data; name="'.OAuthBodyMultipartFormdata::encodeParameterName($name).'"; filename="'.OAuthBodyMultipartFormdata::encodeParameterName($filename).'"'."\r\n"; - $body .= 'Content-Type: '.$mime; - $body .= "\r\n\r\n"; - $body .= $data; - $body .= "\r\n"; - } - - } - } - $body .= '--'.$boundary."--\r\n"; - - $headers['Content-Length'] = strlen($body); - return array($headers, $body); - } - - - /** - * Encode a parameter's name for use in a multipart header. - * For now we do a simple filter that removes some unwanted characters. - * We might want to implement RFC1522 here. See http://tools.ietf.org/html/rfc1522 - * - * @param string name - * @return string - */ - static function encodeParameterName ( $name ) - { - return preg_replace('/[^\x20-\x7f]|"/', '-', $name); - } -} - - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/discovery/xrds_parse.php b/mod/oauth_api/vendors/oauth/library/discovery/xrds_parse.php deleted file mode 100644 index c9cf94997..000000000 --- a/mod/oauth_api/vendors/oauth/library/discovery/xrds_parse.php +++ /dev/null @@ -1,304 +0,0 @@ - - * - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -/* example of use: - -header('content-type: text/plain'); -$file = file_get_contents('../../test/discovery/xrds-magnolia.xrds'); -$xrds = xrds_parse($file); -print_r($xrds); - - */ - -/** - * Parse the xrds file in the argument. The xrds description must have been - * fetched via curl or something else. - * - * TODO: more robust checking, support for more service documents - * TODO: support for URIs to definition instead of local xml:id - * - * @param string data contents of xrds file - * @exception Exception when the file is in an unknown format - * @return array - */ -function xrds_parse ( $data ) -{ - $oauth = array(); - $doc = @DOMDocument::loadXML($data); - if ($doc === false) - { - throw new Exception('Error in XML, can\'t load XRDS document'); - } - - $xpath = new DOMXPath($doc); - $xpath->registerNamespace('xrds', 'xri://$xrds'); - $xpath->registerNamespace('xrd', 'xri://$XRD*($v*2.0)'); - $xpath->registerNamespace('simple', 'http://xrds-simple.net/core/1.0'); - - // Yahoo! uses this namespace, with lowercase xrd in it - $xpath->registerNamespace('xrd2', 'xri://$xrd*($v*2.0)'); - - $uris = xrds_oauth_service_uris($xpath); - - foreach ($uris as $uri) - { - // TODO: support uris referring to service documents outside this one - if ($uri{0} == '#') - { - $id = substr($uri, 1); - $oauth = xrds_xrd_oauth($xpath, $id); - if (is_array($oauth) && !empty($oauth)) - { - return $oauth; - } - } - } - - return false; -} - - -/** - * Parse a XRD definition for OAuth and return the uris etc. - * - * @param XPath xpath - * @param string id - * @return array - */ -function xrds_xrd_oauth ( $xpath, $id ) -{ - $oauth = array(); - $xrd = $xpath->query('//xrds:XRDS/xrd:XRD[@xml:id="'.$id.'"]'); - if ($xrd->length == 0) - { - // Yahoo! uses another namespace - $xrd = $xpath->query('//xrds:XRDS/xrd2:XRD[@xml:id="'.$id.'"]'); - } - - if ($xrd->length >= 1) - { - $x = $xrd->item(0); - $services = array(); - foreach ($x->childNodes as $n) - { - switch ($n->nodeName) - { - case 'Type': - if ($n->nodeValue != 'xri://$xrds*simple') - { - // Not a simple XRDS document - return false; - } - break; - case 'Expires': - $oauth['expires'] = $n->nodeValue; - break; - case 'Service': - list($type,$service) = xrds_xrd_oauth_service($n); - if ($type) - { - $services[$type][xrds_priority($n)][] = $service; - } - break; - } - } - - // Flatten the services on priority - foreach ($services as $type => $service) - { - $oauth[$type] = xrds_priority_flatten($service); - } - } - else - { - $oauth = false; - } - return $oauth; -} - - -/** - * Parse a service definition for OAuth in a simple xrd element - * - * @param DOMElement n - * @return array (type, service desc) - */ -function xrds_xrd_oauth_service ( $n ) -{ - $service = array( - 'uri' => '', - 'signature_method' => array(), - 'parameters' => array() - ); - - $type = false; - foreach ($n->childNodes as $c) - { - $name = $c->nodeName; - $value = $c->nodeValue; - - if ($name == 'URI') - { - $service['uri'] = $value; - } - else if ($name == 'Type') - { - if (strncmp($value, 'http://oauth.net/core/1.0/endpoint/', 35) == 0) - { - $type = basename($value); - } - else if (strncmp($value, 'http://oauth.net/core/1.0/signature/', 36) == 0) - { - $service['signature_method'][] = basename($value); - } - else if (strncmp($value, 'http://oauth.net/core/1.0/parameters/', 37) == 0) - { - $service['parameters'][] = basename($value); - } - else if (strncmp($value, 'http://oauth.net/discovery/1.0/consumer-identity/', 49) == 0) - { - $type = 'consumer_identity'; - $service['method'] = basename($value); - unset($service['signature_method']); - unset($service['parameters']); - } - else - { - $service['unknown'][] = $value; - } - } - else if ($name == 'LocalID') - { - $service['consumer_key'] = $value; - } - else if ($name{0} != '#') - { - $service[strtolower($name)] = $value; - } - } - return array($type, $service); -} - - -/** - * Return the OAuth service uris in order of the priority. - * - * @param XPath xpath - * @return array - */ -function xrds_oauth_service_uris ( $xpath ) -{ - $uris = array(); - $xrd_oauth = $xpath->query('//xrds:XRDS/xrd:XRD/xrd:Service/xrd:Type[.=\'http://oauth.net/discovery/1.0\']'); - if ($xrd_oauth->length > 0) - { - $service = array(); - foreach ($xrd_oauth as $xo) - { - // Find the URI of the service definition - $cs = $xo->parentNode->childNodes; - foreach ($cs as $c) - { - if ($c->nodeName == 'URI') - { - $prio = xrds_priority($xo); - $service[$prio][] = $c->nodeValue; - } - } - } - $uris = xrds_priority_flatten($service); - } - return $uris; -} - - - -/** - * Flatten an array according to the priority - * - * @param array ps buckets per prio - * @return array one dimensional array - */ -function xrds_priority_flatten ( $ps ) -{ - $prio = array(); - $null = array(); - ksort($ps); - foreach ($ps as $idx => $bucket) - { - if (!empty($bucket)) - { - if ($idx == 'null') - { - $null = $bucket; - } - else - { - $prio = array_merge($prio, $bucket); - } - } - } - $prio = array_merge($prio, $bucket); - return $prio; -} - - -/** - * Fetch the priority of a element - * - * @param DOMElement elt - * @return mixed 'null' or int - */ -function xrds_priority ( $elt ) -{ - if ($elt->hasAttribute('priority')) - { - $prio = $elt->getAttribute('priority'); - if (is_numeric($prio)) - { - $prio = intval($prio); - } - } - else - { - $prio = 'null'; - } - return $prio; -} - - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/discovery/xrds_parse.txt b/mod/oauth_api/vendors/oauth/library/discovery/xrds_parse.txt deleted file mode 100644 index fd867ea9f..000000000 --- a/mod/oauth_api/vendors/oauth/library/discovery/xrds_parse.txt +++ /dev/null @@ -1,101 +0,0 @@ -The xrds_parse.php script contains the function: - - function xrds_parse ( $data. ) - -$data Contains the contents of a XRDS XML file. -When the data is invalid XML then this will throw an exception. - -After parsing a XRDS definition it will return a datastructure much like the one below. - -Array -( - [expires] => 2008-04-13T07:34:58Z - [request] => Array - ( - [0] => Array - ( - [uri] => https://ma.gnolia.com/oauth/get_request_token - [signature_method] => Array - ( - [0] => HMAC-SHA1 - [1] => RSA-SHA1 - [2] => PLAINTEXT - ) - - [parameters] => Array - ( - [0] => auth-header - [1] => post-body - [2] => uri-query - ) - ) - ) - - [authorize] => Array - ( - [0] => Array - ( - [uri] => http://ma.gnolia.com/oauth/authorize - [signature_method] => Array - ( - ) - - [parameters] => Array - ( - [0] => auth-header - [1] => uri-query - ) - ) - ) - - [access] => Array - ( - [0] => Array - ( - [uri] => https://ma.gnolia.com/oauth/get_access_token - [signature_method] => Array - ( - [0] => HMAC-SHA1 - [1] => RSA-SHA1 - [2] => PLAINTEXT - ) - - [parameters] => Array - ( - [0] => auth-header - [1] => post-body - [2] => uri-query - ) - ) - ) - - [resource] => Array - ( - [0] => Array - ( - [uri] => - [signature_method] => Array - ( - [0] => HMAC-SHA1 - [1] => RSA-SHA1 - ) - - [parameters] => Array - ( - [0] => auth-header - [1] => post-body - [2] => uri-query - ) - ) - ) - - [consumer_identity] => Array - ( - [0] => Array - ( - [uri] => http://ma.gnolia.com/applications/new - [method] => oob - ) - ) -) - diff --git a/mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod.class.php b/mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod.class.php deleted file mode 100644 index 34ccb428c..000000000 --- a/mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod.class.php +++ /dev/null @@ -1,69 +0,0 @@ - - * @date Sep 8, 2008 12:04:35 PM - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -abstract class OAuthSignatureMethod -{ - /** - * Return the name of this signature - * - * @return string - */ - abstract public function name(); - - /** - * Return the signature for the given request - * - * @param OAuthRequest request - * @param string base_string - * @param string consumer_secret - * @param string token_secret - * @return string - */ - abstract public function signature ( $request, $base_string, $consumer_secret, $token_secret ); - - /** - * Check if the request signature corresponds to the one calculated for the request. - * - * @param OAuthRequest request - * @param string base_string data to be signed, usually the base string, can be a request body - * @param string consumer_secret - * @param string token_secret - * @param string signature from the request, still urlencoded - * @return string - */ - abstract public function verify ( $request, $base_string, $consumer_secret, $token_secret, $signature ); -} - - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_HMAC_SHA1.php b/mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_HMAC_SHA1.php deleted file mode 100644 index 4bc949c10..000000000 --- a/mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_HMAC_SHA1.php +++ /dev/null @@ -1,115 +0,0 @@ - - * @date Sep 8, 2008 12:21:19 PM - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - - -require_once dirname(__FILE__).'/OAuthSignatureMethod.class.php'; - - -class OAuthSignatureMethod_HMAC_SHA1 extends OAuthSignatureMethod -{ - public function name () - { - return 'HMAC-SHA1'; - } - - - /** - * Calculate the signature using HMAC-SHA1 - * This function is copyright Andy Smith, 2007. - * - * @param OAuthRequest request - * @param string base_string - * @param string consumer_secret - * @param string token_secret - * @return string - */ - function signature ( $request, $base_string, $consumer_secret, $token_secret ) - { - $key = $request->urlencode($consumer_secret).'&'.$request->urlencode($token_secret); - if (function_exists('hash_hmac')) - { - $signature = base64_encode(hash_hmac("sha1", $base_string, $key, true)); - } - else - { - $blocksize = 64; - $hashfunc = 'sha1'; - if (strlen($key) > $blocksize) - { - $key = pack('H*', $hashfunc($key)); - } - $key = str_pad($key,$blocksize,chr(0x00)); - $ipad = str_repeat(chr(0x36),$blocksize); - $opad = str_repeat(chr(0x5c),$blocksize); - $hmac = pack( - 'H*',$hashfunc( - ($key^$opad).pack( - 'H*',$hashfunc( - ($key^$ipad).$base_string - ) - ) - ) - ); - $signature = base64_encode($hmac); - } - return $request->urlencode($signature); - } - - - /** - * Check if the request signature corresponds to the one calculated for the request. - * - * @param OAuthRequest request - * @param string base_string data to be signed, usually the base string, can be a request body - * @param string consumer_secret - * @param string token_secret - * @param string signature from the request, still urlencoded - * @return string - */ - public function verify ( $request, $base_string, $consumer_secret, $token_secret, $signature ) - { - $a = $request->urldecode($signature); - $b = $request->urldecode($this->signature($request, $base_string, $consumer_secret, $token_secret)); - - // We have to compare the decoded values - $valA = base64_decode($a); - $valB = base64_decode($b); - - // Crude binary comparison - return rawurlencode($a) == rawurlencode($b); - } -} - - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_MD5.php b/mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_MD5.php deleted file mode 100644 index 6f593a47f..000000000 --- a/mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_MD5.php +++ /dev/null @@ -1,95 +0,0 @@ - - * @date Sep 8, 2008 12:09:43 PM - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -require_once dirname(__FILE__).'/OAuthSignatureMethod.class.php'; - - -class OAuthSignatureMethod_MD5 extends OAuthSignatureMethod -{ - public function name () - { - return 'MD5'; - } - - - /** - * Calculate the signature using MD5 - * Binary md5 digest, as distinct from PHP's built-in hexdigest. - * This function is copyright Andy Smith, 2007. - * - * @param OAuthRequest request - * @param string base_string - * @param string consumer_secret - * @param string token_secret - * @return string - */ - function signature ( $request, $base_string, $consumer_secret, $token_secret ) - { - $s .= '&'.$request->urlencode($consumer_secret).'&'.$request->urlencode($token_secret); - $md5 = md5($base_string); - $bin = ''; - - for ($i = 0; $i < strlen($md5); $i += 2) - { - $bin .= chr(hexdec($md5{$i+1}) + hexdec($md5{$i}) * 16); - } - return $request->urlencode(base64_encode($bin)); - } - - - /** - * Check if the request signature corresponds to the one calculated for the request. - * - * @param OAuthRequest request - * @param string base_string data to be signed, usually the base string, can be a request body - * @param string consumer_secret - * @param string token_secret - * @param string signature from the request, still urlencoded - * @return string - */ - public function verify ( $request, $base_string, $consumer_secret, $token_secret, $signature ) - { - $a = $request->urldecode($signature); - $b = $request->urldecode($this->signature($request, $base_string, $consumer_secret, $token_secret)); - - // We have to compare the decoded values - $valA = base64_decode($a); - $valB = base64_decode($b); - - // Crude binary comparison - return rawurlencode($a) == rawurlencode($b); - } -} - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_PLAINTEXT.php b/mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_PLAINTEXT.php deleted file mode 100644 index 92ef30867..000000000 --- a/mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_PLAINTEXT.php +++ /dev/null @@ -1,80 +0,0 @@ - - * @date Sep 8, 2008 12:09:43 PM - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -require_once dirname(__FILE__).'/OAuthSignatureMethod.class.php'; - - -class OAuthSignatureMethod_PLAINTEXT extends OAuthSignatureMethod -{ - public function name () - { - return 'PLAINTEXT'; - } - - - /** - * Calculate the signature using PLAINTEXT - * - * @param OAuthRequest request - * @param string base_string - * @param string consumer_secret - * @param string token_secret - * @return string - */ - function signature ( $request, $base_string, $consumer_secret, $token_secret ) - { - return $request->urlencode($request->urlencode($consumer_secret).'&'.$request->urlencode($token_secret)); - } - - - /** - * Check if the request signature corresponds to the one calculated for the request. - * - * @param OAuthRequest request - * @param string base_string data to be signed, usually the base string, can be a request body - * @param string consumer_secret - * @param string token_secret - * @param string signature from the request, still urlencoded - * @return string - */ - public function verify ( $request, $base_string, $consumer_secret, $token_secret, $signature ) - { - $a = $request->urldecode($signature); - $b = $request->urldecode($this->signature($request, $base_string, $consumer_secret, $token_secret)); - - return $request->urldecode($a) == $request->urldecode($b); - } -} - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_RSA_SHA1.php b/mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_RSA_SHA1.php deleted file mode 100644 index 3bbde7d90..000000000 --- a/mod/oauth_api/vendors/oauth/library/signature_method/OAuthSignatureMethod_RSA_SHA1.php +++ /dev/null @@ -1,136 +0,0 @@ - - * @date Sep 8, 2008 12:00:14 PM - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -class OAuthSignatureMethod_RSA_SHA1 extends OAuthSignatureMethod -{ - public function name() - { - return 'RSA-SHA1'; - } - - - /** - * Fetch the public CERT key for the signature - * - * @param OAuthRequest request - * @return string public key - */ - protected function fetch_public_cert ( $request ) - { - // not implemented yet, ideas are: - // (1) do a lookup in a table of trusted certs keyed off of consumer - // (2) fetch via http using a url provided by the requester - // (3) some sort of specific discovery code based on request - // - // either way should return a string representation of the certificate - throw OAuthException("OAuthSignatureMethod_RSA_SHA1::fetch_public_cert not implemented"); - } - - - /** - * Fetch the private CERT key for the signature - * - * @param OAuthRequest request - * @return string private key - */ - protected function fetch_private_cert ( $request ) - { - // not implemented yet, ideas are: - // (1) do a lookup in a table of trusted certs keyed off of consumer - // - // either way should return a string representation of the certificate - throw OAuthException("OAuthSignatureMethod_RSA_SHA1::fetch_private_cert not implemented"); - } - - - /** - * Calculate the signature using RSA-SHA1 - * This function is copyright Andy Smith, 2008. - * - * @param OAuthRequest request - * @param string base_string - * @param string consumer_secret - * @param string token_secret - * @return string - */ - public function signature ( $request, $base_string, $consumer_secret, $token_secret ) - { - // Fetch the private key cert based on the request - $cert = $this->fetch_private_cert($request); - - // Pull the private key ID from the certificate - $privatekeyid = openssl_get_privatekey($cert); - - // Sign using the key - $sig = false; - $ok = openssl_sign($base_string, $sig, $privatekeyid); - - // Release the key resource - openssl_free_key($privatekeyid); - - return $request->urlencode(base64_encode($sig)); - } - - - /** - * Check if the request signature is the same as the one calculated for the request. - * - * @param OAuthRequest request - * @param string base_string - * @param string consumer_secret - * @param string token_secret - * @param string signature - * @return string - */ - public function verify ( $request, $base_string, $consumer_secret, $token_secret, $signature ) - { - $decoded_sig = base64_decode($request->urldecode($signature)); - - // Fetch the public key cert based on the request - $cert = $this->fetch_public_cert($request); - - // Pull the public key ID from the certificate - $publickeyid = openssl_get_publickey($cert); - - // Check the computed signature against the one passed in the query - $ok = openssl_verify($base_string, $decoded_sig, $publickeyid); - - // Release the key resource - openssl_free_key($publickeyid); - return $ok == 1; - } - -} - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/store/OAuthStoreAbstract.class.php b/mod/oauth_api/vendors/oauth/library/store/OAuthStoreAbstract.class.php deleted file mode 100644 index e7cca981a..000000000 --- a/mod/oauth_api/vendors/oauth/library/store/OAuthStoreAbstract.class.php +++ /dev/null @@ -1,149 +0,0 @@ - - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -abstract class OAuthStoreAbstract -{ - abstract public function getSecretsForVerify ( $consumer_key, $token, $token_type = 'access' ); - abstract public function getSecretsForSignature ( $uri, $user_id ); - abstract public function getServerTokenSecrets ( $consumer_key, $token, $token_type, $user_id, $name = '' ); - abstract public function addServerToken ( $consumer_key, $token_type, $token, $token_secret, $user_id, $options = array() ); - - abstract public function deleteServer ( $consumer_key, $user_id, $user_is_admin = false ); - abstract public function getServer( $consumer_key, $user_id, $user_is_admin = false ); - abstract public function getServerForUri ( $uri, $user_id ); - abstract public function listServerTokens ( $user_id ); - abstract public function countServerTokens ( $consumer_key ); - abstract public function getServerToken ( $consumer_key, $token, $user_id ); - abstract public function deleteServerToken ( $consumer_key, $token, $user_id, $user_is_admin = false ); - abstract public function listServers ( $q = '', $user_id ); - abstract public function updateServer ( $server, $user_id, $user_is_admin = false ); - - abstract public function updateConsumer ( $consumer, $user_id, $user_is_admin = false ); - abstract public function deleteConsumer ( $consumer_key, $user_id, $user_is_admin = false ); - abstract public function getConsumer ( $consumer_key, $user_id, $user_is_admin = false ); - abstract public function getConsumerStatic (); - - abstract public function addConsumerRequestToken ( $consumer_key, $options = array() ); - abstract public function getConsumerRequestToken ( $token ); - abstract public function deleteConsumerRequestToken ( $token ); - abstract public function authorizeConsumerRequestToken ( $token, $user_id, $referrer_host = '' ); - abstract public function countConsumerAccessTokens ( $consumer_key ); - abstract public function exchangeConsumerRequestForAccessToken ( $token, $options = array() ); - abstract public function getConsumerAccessToken ( $token, $user_id ); - abstract public function deleteConsumerAccessToken ( $token, $user_id, $user_is_admin = false ); - abstract public function setConsumerAccessTokenTtl ( $token, $ttl ); - - abstract public function listConsumers ( $user_id ); - abstract public function listConsumerTokens ( $user_id ); - - abstract public function checkServerNonce ( $consumer_key, $token, $timestamp, $nonce ); - - abstract public function addLog ( $keys, $received, $sent, $base_string, $notes, $user_id = null ); - abstract public function listLog ( $options, $user_id ); - - abstract public function install (); - - /** - * Fetch the current static consumer key for this site, create it when it was not found. - * The consumer secret for the consumer key is always empty. - * - * @return string consumer key - */ - - - /* ** Some handy utility functions ** */ - - /** - * Generate a unique key - * - * @param boolean unique force the key to be unique - * @return string - */ - public function generateKey ( $unique = false ) - { - $key = md5(uniqid(rand(), true)); - if ($unique) - { - list($usec,$sec) = explode(' ',microtime()); - $key .= dechex($usec).dechex($sec); - } - return $key; - } - - /** - * Check to see if a string is valid utf8 - * - * @param string $s - * @return boolean - */ - protected function isUTF8 ( $s ) - { - return preg_match('%(?: - [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte - |\xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs - |[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte - |\xED[\x80-\x9F][\x80-\xBF] # excluding surrogates - |\xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 - |[\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 - |\xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 - )+%xs', $s); - } - - - /** - * Make a string utf8, replacing all non-utf8 chars with a '.' - * - * @param string - * @return string - */ - protected function makeUTF8 ( $s ) - { - if (function_exists('iconv')) - { - do - { - $ok = true; - $text = @iconv('UTF-8', 'UTF-8//TRANSLIT', $s); - if (strlen($text) != strlen($s)) - { - // Remove the offending character... - $s = $text . '.' . substr($s, strlen($text) + 1); - $ok = false; - } - } - while (!$ok); - } - return $s; - } - -} - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/store/OAuthStoreAnyMeta.php b/mod/oauth_api/vendors/oauth/library/store/OAuthStoreAnyMeta.php deleted file mode 100644 index 9c971733f..000000000 --- a/mod/oauth_api/vendors/oauth/library/store/OAuthStoreAnyMeta.php +++ /dev/null @@ -1,265 +0,0 @@ - - * @date Nov 16, 2007 4:03:30 PM - * - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -require_once dirname(__FILE__) . '/../../../../core/inc/any_database.inc.php'; -require_once dirname(__FILE__) . '/OAuthStoreMySQL.php'; - - -class OAuthStoreAnymeta extends OAuthStoreMySQL -{ - /** - * Construct the OAuthStoreAnymeta - * - * @param array options - */ - function __construct ( $options = array() ) - { - parent::__construct(array('conn' => any_db_conn())); - } - - - /** - * Add an entry to the log table - * - * @param array keys (osr_consumer_key, ost_token, ocr_consumer_key, oct_token) - * @param string received - * @param string sent - * @param string base_string - * @param string notes - * @param int (optional) user_id - */ - public function addLog ( $keys, $received, $sent, $base_string, $notes, $user_id = null ) - { - if (is_null($user_id) && isset($GLOBALS['any_auth'])) - { - $user_id = $GLOBALS['any_auth']->getUserId(); - } - parent::addLog($keys, $received, $sent, $base_string, $notes, $user_id); - } - - - /** - * Get a page of entries from the log. Returns the last 100 records - * matching the options given. - * - * @param array options - * @param int user_id current user - * @return array log records - */ - public function listLog ( $options, $user_id ) - { - $where = array(); - $args = array(); - if (empty($options)) - { - $where[] = 'olg_usa_id_ref = %d'; - $args[] = $user_id; - } - else - { - foreach ($options as $option => $value) - { - if (strlen($value) > 0) - { - switch ($option) - { - case 'osr_consumer_key': - case 'ocr_consumer_key': - case 'ost_token': - case 'oct_token': - $where[] = 'olg_'.$option.' = \'%s\''; - $args[] = $value; - break; - } - } - } - - $where[] = '(olg_usa_id_ref IS NULL OR olg_usa_id_ref = %d)'; - $args[] = $user_id; - } - - $rs = any_db_query_all_assoc(' - SELECT olg_id, - olg_osr_consumer_key AS osr_consumer_key, - olg_ost_token AS ost_token, - olg_ocr_consumer_key AS ocr_consumer_key, - olg_oct_token AS oct_token, - olg_usa_id_ref AS user_id, - olg_received AS received, - olg_sent AS sent, - olg_base_string AS base_string, - olg_notes AS notes, - olg_timestamp AS timestamp, - INET_NTOA(olg_remote_ip) AS remote_ip - FROM oauth_log - WHERE '.implode(' AND ', $where).' - ORDER BY olg_id DESC - LIMIT 0,100', $args); - - return $rs; - } - - - - /** - * Initialise the database - */ - public function install () - { - parent::install(); - - any_db_query("ALTER TABLE oauth_consumer_registry MODIFY ocr_usa_id_ref int(11) unsigned"); - any_db_query("ALTER TABLE oauth_consumer_token MODIFY oct_usa_id_ref int(11) unsigned not null"); - any_db_query("ALTER TABLE oauth_server_registry MODIFY osr_usa_id_ref int(11) unsigned"); - any_db_query("ALTER TABLE oauth_server_token MODIFY ost_usa_id_ref int(11) unsigned not null"); - any_db_query("ALTER TABLE oauth_log MODIFY olg_usa_id_ref int(11) unsigned"); - - any_db_alter_add_fk('oauth_consumer_registry', 'ocr_usa_id_ref', 'any_user_auth(usa_id_ref)', 'on update cascade on delete set null'); - any_db_alter_add_fk('oauth_consumer_token', 'oct_usa_id_ref', 'any_user_auth(usa_id_ref)', 'on update cascade on delete cascade'); - any_db_alter_add_fk('oauth_server_registry', 'osr_usa_id_ref', 'any_user_auth(usa_id_ref)', 'on update cascade on delete set null'); - any_db_alter_add_fk('oauth_server_token', 'ost_usa_id_ref', 'any_user_auth(usa_id_ref)', 'on update cascade on delete cascade'); - any_db_alter_add_fk('oauth_log', 'olg_usa_id_ref', 'any_user_auth(usa_id_ref)', 'on update cascade on delete cascade'); - } - - - - /** Some simple helper functions for querying the mysql db **/ - - /** - * Perform a query, ignore the results - * - * @param string sql - * @param vararg arguments (for sprintf) - */ - protected function query ( $sql ) - { - list($sql, $args) = $this->sql_args(func_get_args()); - any_db_query($sql, $args); - } - - - /** - * Perform a query, ignore the results - * - * @param string sql - * @param vararg arguments (for sprintf) - * @return array - */ - protected function query_all_assoc ( $sql ) - { - list($sql, $args) = $this->sql_args(func_get_args()); - return any_db_query_all_assoc($sql, $args); - } - - - /** - * Perform a query, return the first row - * - * @param string sql - * @param vararg arguments (for sprintf) - * @return array - */ - protected function query_row_assoc ( $sql ) - { - list($sql, $args) = $this->sql_args(func_get_args()); - return any_db_query_row_assoc($sql, $args); - } - - - /** - * Perform a query, return the first row - * - * @param string sql - * @param vararg arguments (for sprintf) - * @return array - */ - protected function query_row ( $sql ) - { - list($sql, $args) = $this->sql_args(func_get_args()); - return any_db_query_row($sql, $args); - } - - - /** - * Perform a query, return the first column of the first row - * - * @param string sql - * @param vararg arguments (for sprintf) - * @return mixed - */ - protected function query_one ( $sql ) - { - list($sql, $args) = $this->sql_args(func_get_args()); - return any_db_query_one($sql, $args); - } - - - /** - * Return the number of rows affected in the last query - * - * @return int - */ - protected function query_affected_rows () - { - return any_db_affected_rows(); - } - - - /** - * Return the id of the last inserted row - * - * @return int - */ - protected function query_insert_id () - { - return any_db_insert_id(); - } - - - private function sql_args ( $args ) - { - $sql = array_shift($args); - if (count($args) == 1 && is_array($args[0])) - { - $args = $args[0]; - } - return array($sql, $args); - } - -} - - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/store/OAuthStoreMySQL.php b/mod/oauth_api/vendors/oauth/library/store/OAuthStoreMySQL.php deleted file mode 100644 index a1b04c5c8..000000000 --- a/mod/oauth_api/vendors/oauth/library/store/OAuthStoreMySQL.php +++ /dev/null @@ -1,1879 +0,0 @@ - - * @date Nov 16, 2007 4:03:30 PM - * - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - - -require_once dirname(__FILE__) . '/OAuthStoreAbstract.class.php'; - - -class OAuthStoreMySQL extends OAuthStoreAbstract -{ - /** - * The MySQL connection - */ - protected $conn; - - /** - * Maximum delta a timestamp may be off from a previous timestamp. - * Allows multiple consumers with some clock skew to work with the same token. - * Unit is seconds, default max skew is 10 minutes. - */ - protected $max_timestamp_skew = 600; - - /** - * Default ttl for request tokens - */ - protected $max_request_token_ttl = 3600; - - - /** - * Construct the OAuthStoreMySQL. - * In the options you have to supply either: - * - server, username, password and database (for a mysql_connect) - * - conn (for the connection to be used) - * - * @param array options - */ - function __construct ( $options = array() ) - { - if (isset($options['conn'])) - { - $this->conn = $options['conn']; - } - else - { - if (isset($options['server'])) - { - $server = $options['server']; - $username = $options['username']; - - if (isset($options['password'])) - { - $this->conn = mysql_connect($server, $username, $options['password']); - } - else - { - $this->conn = mysql_connect($server, $username); - } - } - else - { - // Try the default mysql connect - $this->conn = mysql_connect(); - } - - if ($this->conn === false) - { - throw new OAuthException('Could not connect to MySQL database: ' . mysql_error()); - } - - if (isset($options['database'])) - { - if (!mysql_select_db($options['database'], $this->conn)) - { - $this->sql_errcheck(); - } - } - $this->query('set character set utf8'); - } - } - - - /** - * Find stored credentials for the consumer key and token. Used by an OAuth server - * when verifying an OAuth request. - * - * @param string consumer_key - * @param string token - * @param string token_type false, 'request' or 'access' - * @exception OAuthException when no secrets where found - * @return array assoc (consumer_secret, token_secret, osr_id, ost_id, user_id) - */ - public function getSecretsForVerify ( $consumer_key, $token, $token_type = 'access' ) - { - if ($token_type === false) - { - $rs = $this->query_row_assoc(' - SELECT osr_id, - osr_consumer_key as consumer_key, - osr_consumer_secret as consumer_secret - FROM oauth_server_registry - WHERE osr_consumer_key = \'%s\' - AND osr_enabled = 1 - ', - $consumer_key); - - if ($rs) - { - $rs['token'] = false; - $rs['token_secret'] = false; - $rs['user_id'] = false; - $rs['ost_id'] = false; - } - } - else - { - $rs = $this->query_row_assoc(' - SELECT osr_id, - ost_id, - ost_usa_id_ref as user_id, - osr_consumer_key as consumer_key, - osr_consumer_secret as consumer_secret, - ost_token as token, - ost_token_secret as token_secret - FROM oauth_server_registry - JOIN oauth_server_token - ON ost_osr_id_ref = osr_id - WHERE ost_token_type = \'%s\' - AND osr_consumer_key = \'%s\' - AND ost_token = \'%s\' - AND osr_enabled = 1 - AND ost_token_ttl >= NOW() - ', - $token_type, $consumer_key, $token); - } - - if (empty($rs)) - { - throw new OAuthException('The consumer_key "'.$consumer_key.'" token "'.$token.'" combination does not exist or is not enabled.'); - } - return $rs; - } - - - /** - * Find the server details for signing a request, always looks for an access token. - * The returned credentials depend on which local user is making the request. - * - * The consumer_key must belong to the user or be public (user id is null) - * - * For signing we need all of the following: - * - * consumer_key consumer key associated with the server - * consumer_secret consumer secret associated with this server - * token access token associated with this server - * token_secret secret for the access token - * signature_methods signing methods supported by the server (array) - * - * @todo filter on token type (we should know how and with what to sign this request, and there might be old access tokens) - * @param string uri uri of the server - * @param int user_id id of the logged on user - * @param string name (optional) name of the token (case sensitive) - * @exception OAuthException when no credentials found - * @return array - */ - public function getSecretsForSignature ( $uri, $user_id, $name = '' ) - { - // Find a consumer key and token for the given uri - $ps = parse_url($uri); - $host = isset($ps['host']) ? $ps['host'] : 'localhost'; - $path = isset($ps['path']) ? $ps['path'] : ''; - - if (empty($path) || substr($path, -1) != '/') - { - $path .= '/'; - } - - // The owner of the consumer_key is either the user or nobody (public consumer key) - $secrets = $this->query_row_assoc(' - SELECT ocr_consumer_key as consumer_key, - ocr_consumer_secret as consumer_secret, - oct_token as token, - oct_token_secret as token_secret, - ocr_signature_methods as signature_methods - FROM oauth_consumer_registry - JOIN oauth_consumer_token ON oct_ocr_id_ref = ocr_id - WHERE ocr_server_uri_host = \'%s\' - AND ocr_server_uri_path = LEFT(\'%s\', LENGTH(ocr_server_uri_path)) - AND (ocr_usa_id_ref = %s OR ocr_usa_id_ref IS NULL) - AND oct_usa_id_ref = %d - AND oct_token_type = \'access\' - AND oct_name = \'%s\' - AND oct_token_ttl >= NOW() - ORDER BY ocr_usa_id_ref DESC, ocr_consumer_secret DESC, LENGTH(ocr_server_uri_path) DESC - LIMIT 0,1 - ', $host, $path, $user_id, $user_id, $name - ); - - if (empty($secrets)) - { - throw new OAuthException('No server tokens available for '.$uri); - } - $secrets['signature_methods'] = explode(',', $secrets['signature_methods']); - return $secrets; - } - - - /** - * Get the token and token secret we obtained from a server. - * - * @param string consumer_key - * @param string token - * @param string token_type - * @param int user_id the user owning the token - * @param string name optional name for a named token - * @exception OAuthException when no credentials found - * @return array - */ - public function getServerTokenSecrets ( $consumer_key, $token, $token_type, $user_id, $name = '' ) - { - if ($token_type != 'request' && $token_type != 'access') - { - throw new OAuthException('Unkown token type "'.$token_type.'", must be either "request" or "access"'); - } - - // Take the most recent token of the given type - $r = $this->query_row_assoc(' - SELECT ocr_consumer_key as consumer_key, - ocr_consumer_secret as consumer_secret, - oct_token as token, - oct_token_secret as token_secret, - oct_name as token_name, - ocr_signature_methods as signature_methods, - ocr_server_uri as server_uri, - ocr_request_token_uri as request_token_uri, - ocr_authorize_uri as authorize_uri, - ocr_access_token_uri as access_token_uri, - IF(oct_token_ttl >= \'9999-12-31\', NULL, UNIX_TIMESTAMP(oct_token_ttl) - UNIX_TIMESTAMP(NOW())) as token_ttl - FROM oauth_consumer_registry - JOIN oauth_consumer_token - ON oct_ocr_id_ref = ocr_id - WHERE ocr_consumer_key = \'%s\' - AND oct_token_type = \'%s\' - AND oct_token = \'%s\' - AND oct_usa_id_ref = %d - AND oct_token_ttl >= NOW() - ', $consumer_key, $token_type, $token, $user_id - ); - - if (empty($r)) - { - throw new OAuthException('Could not find a "'.$token_type.'" token for consumer "'.$consumer_key.'" and user '.$user_id); - } - if (isset($r['signature_methods']) && !empty($r['signature_methods'])) - { - $r['signature_methods'] = explode(',',$r['signature_methods']); - } - else - { - $r['signature_methods'] = array(); - } - return $r; - } - - - /** - * Add a request token we obtained from a server. - * - * @todo remove old tokens for this user and this ocr_id - * @param string consumer_key key of the server in the consumer registry - * @param string token_type one of 'request' or 'access' - * @param string token - * @param string token_secret - * @param int user_id the user owning the token - * @param array options extra options, name and token_ttl - * @exception OAuthException when server is not known - * @exception OAuthException when we received a duplicate token - */ - public function addServerToken ( $consumer_key, $token_type, $token, $token_secret, $user_id, $options = array() ) - { - if ($token_type != 'request' && $token_type != 'access') - { - throw new OAuthException('Unknown token type "'.$token_type.'", must be either "request" or "access"'); - } - - // Maximum time to live for this token - if (isset($options['token_ttl']) && is_numeric($options['token_ttl'])) - { - $ttl = 'DATE_ADD(NOW(), INTERVAL '.intval($options['token_ttl']).' SECOND)'; - } - else if ($token == 'request') - { - $ttl = 'DATE_ADD(NOW(), INTERVAL '.$this->max_request_token_ttl.' SECOND)'; - } - else - { - $ttl = "'9999-12-31'"; - } - - $ocr_id = $this->query_one(' - SELECT ocr_id - FROM oauth_consumer_registry - WHERE ocr_consumer_key = \'%s\' - ', $consumer_key); - - if (empty($ocr_id)) - { - throw new OAuthException('No server associated with consumer_key "'.$consumer_key.'"'); - } - - // Named tokens, unique per user/consumer key - if (isset($options['name']) && $options['name'] != '') - { - $name = $options['name']; - } - else - { - $name = ''; - } - - // Delete any old tokens with the same type and name for this user/server combination - $this->query(' - DELETE FROM oauth_consumer_token - WHERE oct_ocr_id_ref = %d - AND oct_usa_id_ref = %d - AND oct_token_type = LOWER(\'%s\') - AND oct_name = \'%s\' - ', - $ocr_id, - $user_id, - $token_type, - $name); - - // Insert the new token - $this->query(' - INSERT IGNORE INTO oauth_consumer_token - SET oct_ocr_id_ref = %d, - oct_usa_id_ref = %d, - oct_name = \'%s\', - oct_token = \'%s\', - oct_token_secret= \'%s\', - oct_token_type = LOWER(\'%s\'), - oct_timestamp = NOW(), - oct_token_ttl = '.$ttl.' - ', - $ocr_id, - $user_id, - $name, - $token, - $token_secret, - $token_type); - - if (!$this->query_affected_rows()) - { - throw new OAuthException('Received duplicate token "'.$token.'" for the same consumer_key "'.$consumer_key.'"'); - } - } - - - /** - * Delete a server key. This removes access to that site. - * - * @param string consumer_key - * @param int user_id user registering this server - * @param boolean user_is_admin - */ - public function deleteServer ( $consumer_key, $user_id, $user_is_admin = false ) - { - if ($user_is_admin) - { - $this->query(' - DELETE FROM oauth_consumer_registry - WHERE ocr_consumer_key = \'%s\' - AND (ocr_usa_id_ref = %d OR ocr_usa_id_ref IS NULL) - ', $consumer_key, $user_id); - } - else - { - $this->query(' - DELETE FROM oauth_consumer_registry - WHERE ocr_consumer_key = \'%s\' - AND ocr_usa_id_ref = %d - ', $consumer_key, $user_id); - } - } - - - /** - * Get a server from the consumer registry using the consumer key - * - * @param string consumer_key - * @param int user_id - * @param boolean user_is_admin (optional) - * @exception OAuthException when server is not found - * @return array - */ - public function getServer ( $consumer_key, $user_id, $user_is_admin = false ) - { - $r = $this->query_row_assoc(' - SELECT ocr_id as id, - ocr_usa_id_ref as user_id, - ocr_consumer_key as consumer_key, - ocr_consumer_secret as consumer_secret, - ocr_signature_methods as signature_methods, - ocr_server_uri as server_uri, - ocr_request_token_uri as request_token_uri, - ocr_authorize_uri as authorize_uri, - ocr_access_token_uri as access_token_uri - FROM oauth_consumer_registry - WHERE ocr_consumer_key = \'%s\' - AND (ocr_usa_id_ref = %d OR ocr_usa_id_ref IS NULL) - ', $consumer_key, $user_id); - - if (empty($r)) - { - throw new OAuthException('No server with consumer_key "'.$consumer_key.'" has been registered (for this user)'); - } - - if (isset($r['signature_methods']) && !empty($r['signature_methods'])) - { - $r['signature_methods'] = explode(',',$r['signature_methods']); - } - else - { - $r['signature_methods'] = array(); - } - return $r; - } - - - - /** - * Find the server details that might be used for a request - * - * The consumer_key must belong to the user or be public (user id is null) - * - * @param string uri uri of the server - * @param int user_id id of the logged on user - * @exception OAuthException when no credentials found - * @return array - */ - public function getServerForUri ( $uri, $user_id ) - { - // Find a consumer key and token for the given uri - $ps = parse_url($uri); - $host = isset($ps['host']) ? $ps['host'] : 'localhost'; - $path = isset($ps['path']) ? $ps['path'] : ''; - - if (empty($path) || substr($path, -1) != '/') - { - $path .= '/'; - } - - // The owner of the consumer_key is either the user or nobody (public consumer key) - $server = $this->query_row_assoc(' - SELECT ocr_id as id, - ocr_usa_id_ref as user_id, - ocr_consumer_key as consumer_key, - ocr_consumer_secret as consumer_secret, - ocr_signature_methods as signature_methods, - ocr_server_uri as server_uri, - ocr_request_token_uri as request_token_uri, - ocr_authorize_uri as authorize_uri, - ocr_access_token_uri as access_token_uri - FROM oauth_consumer_registry - WHERE ocr_server_uri_host = \'%s\' - AND ocr_server_uri_path = LEFT(\'%s\', LENGTH(ocr_server_uri_path)) - AND (ocr_usa_id_ref = %s OR ocr_usa_id_ref IS NULL) - ORDER BY ocr_usa_id_ref DESC, consumer_secret DESC, LENGTH(ocr_server_uri_path) DESC - LIMIT 0,1 - ', $host, $path, $user_id - ); - - if (empty($server)) - { - throw new OAuthException('No server available for '.$uri); - } - $server['signature_methods'] = explode(',', $server['signature_methods']); - return $server; - } - - - /** - * Get a list of all server token this user has access to. - * - * @param int usr_id - * @return array - */ - public function listServerTokens ( $user_id ) - { - $ts = $this->query_all_assoc(' - SELECT ocr_consumer_key as consumer_key, - ocr_consumer_secret as consumer_secret, - oct_id as token_id, - oct_token as token, - oct_token_secret as token_secret, - oct_usa_id_ref as user_id, - ocr_signature_methods as signature_methods, - ocr_server_uri as server_uri, - ocr_server_uri_host as server_uri_host, - ocr_server_uri_path as server_uri_path, - ocr_request_token_uri as request_token_uri, - ocr_authorize_uri as authorize_uri, - ocr_access_token_uri as access_token_uri, - oct_timestamp as timestamp - FROM oauth_consumer_registry - JOIN oauth_consumer_token - ON oct_ocr_id_ref = ocr_id - WHERE oct_usa_id_ref = %d - AND oct_token_type = \'access\' - AND oct_token_ttl >= NOW() - ORDER BY ocr_server_uri_host, ocr_server_uri_path - ', $user_id); - return $ts; - } - - - /** - * Count how many tokens we have for the given server - * - * @param string consumer_key - * @return int - */ - public function countServerTokens ( $consumer_key ) - { - $count = $this->query_one(' - SELECT COUNT(oct_id) - FROM oauth_consumer_token - JOIN oauth_consumer_registry - ON oct_ocr_id_ref = ocr_id - WHERE oct_token_type = \'access\' - AND ocr_consumer_key = \'%s\' - AND oct_token_ttl >= NOW() - ', $consumer_key); - - return $count; - } - - - /** - * Get a specific server token for the given user - * - * @param string consumer_key - * @param string token - * @param int user_id - * @exception OAuthException when no such token found - * @return array - */ - public function getServerToken ( $consumer_key, $token, $user_id ) - { - $ts = $this->query_row_assoc(' - SELECT ocr_consumer_key as consumer_key, - ocr_consumer_secret as consumer_secret, - oct_token as token, - oct_token_secret as token_secret, - oct_usa_id_ref as usr_id, - ocr_signature_methods as signature_methods, - ocr_server_uri as server_uri, - ocr_server_uri_host as server_uri_host, - ocr_server_uri_path as server_uri_path, - ocr_request_token_uri as request_token_uri, - ocr_authorize_uri as authorize_uri, - ocr_access_token_uri as access_token_uri, - oct_timestamp as timestamp - FROM oauth_consumer_registry - JOIN oauth_consumer_token - ON oct_ocr_id_ref = ocr_id - WHERE ocr_consumer_key = \'%s\' - AND oct_usa_id_ref = %d - AND oct_token_type = \'access\' - AND oct_token = \'%s\' - AND oct_token_ttl >= NOW() - ', $consumer_key, $user_id, $token); - - if (empty($ts)) - { - throw new OAuthException('No such consumer key ('.$consumer_key.') and token ('.$token.') combination for user "'.$user_id.'"'); - } - return $ts; - } - - - /** - * Delete a token we obtained from a server. - * - * @param string consumer_key - * @param string token - * @param int user_id - * @param boolean user_is_admin - */ - public function deleteServerToken ( $consumer_key, $token, $user_id, $user_is_admin = false ) - { - if ($user_is_admin) - { - $this->query(' - DELETE oauth_consumer_token - FROM oauth_consumer_token - JOIN oauth_consumer_registry - ON oct_ocr_id_ref = ocr_id - WHERE ocr_consumer_key = \'%s\' - AND oct_token = \'%s\' - ', $consumer_key, $token); - } - else - { - $this->query(' - DELETE oauth_consumer_token - FROM oauth_consumer_token - JOIN oauth_consumer_registry - ON oct_ocr_id_ref = ocr_id - WHERE ocr_consumer_key = \'%s\' - AND oct_token = \'%s\' - AND oct_usa_id_ref = %d - ', $consumer_key, $token, $user_id); - } - } - - - /** - * Set the ttl of a server access token. This is done when the - * server receives a valid request with a xoauth_token_ttl parameter in it. - * - * @param string consumer_key - * @param string token - * @param int token_ttl - */ - public function setServerTokenTtl ( $consumer_key, $token, $token_ttl ) - { - if ($token_ttl <= 0) - { - // Immediate delete when the token is past its ttl - $this->deleteServerToken($consumer_key, $token, 0, true); - } - else - { - // Set maximum time to live for this token - $this->query(' - UPDATE oauth_consumer_token, oauth_consumer_registry - SET ost_token_ttl = DATE_ADD(NOW(), INTERVAL %d SECOND) - WHERE ocr_consumer_key = \'%s\' - AND oct_ocr_id_ref = ocr_id - AND oct_token = \'%s\' - ', $token_ttl, $consumer_key, $token); - } - } - - - /** - * Get a list of all consumers from the consumer registry. - * The consumer keys belong to the user or are public (user id is null) - * - * @param string q query term - * @param int user_id - * @return array - */ - public function listServers ( $q = '', $user_id ) - { - $q = trim(str_replace('%', '', $q)); - $args = array(); - - if (!empty($q)) - { - $where = ' WHERE ( ocr_consumer_key like \'%%%s%%\' - OR ocr_server_uri like \'%%%s%%\' - OR ocr_server_uri_host like \'%%%s%%\' - OR ocr_server_uri_path like \'%%%s%%\') - AND (ocr_usa_id_ref = %d OR ocr_usa_id_ref IS NULL) - '; - - $args[] = $q; - $args[] = $q; - $args[] = $q; - $args[] = $q; - $args[] = $user_id; - } - else - { - $where = ' WHERE ocr_usa_id_ref = %d OR ocr_usa_id_ref IS NULL'; - $args[] = $user_id; - } - - $servers = $this->query_all_assoc(' - SELECT ocr_id as id, - ocr_usa_id_ref as user_id, - ocr_consumer_key as consumer_key, - ocr_consumer_secret as consumer_secret, - ocr_signature_methods as signature_methods, - ocr_server_uri as server_uri, - ocr_server_uri_host as server_uri_host, - ocr_server_uri_path as server_uri_path, - ocr_request_token_uri as request_token_uri, - ocr_authorize_uri as authorize_uri, - ocr_access_token_uri as access_token_uri - FROM oauth_consumer_registry - '.$where.' - ORDER BY ocr_server_uri_host, ocr_server_uri_path - ', $args); - return $servers; - } - - - /** - * Register or update a server for our site (we will be the consumer) - * - * (This is the registry at the consumers, registering servers ;-) ) - * - * @param array server - * @param int user_id user registering this server - * @param boolean user_is_admin - * @exception OAuthException when fields are missing or on duplicate consumer_key - * @return consumer_key - */ - public function updateServer ( $server, $user_id, $user_is_admin = false ) - { - foreach (array('consumer_key', 'server_uri') as $f) - { - if (empty($server[$f])) - { - throw new OAuthException('The field "'.$f.'" must be set and non empty'); - } - } - - if (!empty($server['id'])) - { - $exists = $this->query_one(' - SELECT ocr_id - FROM oauth_consumer_registry - WHERE ocr_consumer_key = \'%s\' - AND ocr_id <> %d - AND (ocr_usa_id_ref = %d OR ocr_usa_id_ref IS NULL) - ', $server['consumer_key'], $server['id'], $user_id); - } - else - { - $exists = $this->query_one(' - SELECT ocr_id - FROM oauth_consumer_registry - WHERE ocr_consumer_key = \'%s\' - AND (ocr_usa_id_ref = %d OR ocr_usa_id_ref IS NULL) - ', $server['consumer_key'], $user_id); - } - - if ($exists) - { - throw new OAuthException('The server with key "'.$server['consumer_key'].'" has already been registered'); - } - - $parts = parse_url($server['server_uri']); - $host = (isset($parts['host']) ? $parts['host'] : 'localhost'); - $path = (isset($parts['path']) ? $parts['path'] : '/'); - - if (isset($server['signature_methods'])) - { - if (is_array($server['signature_methods'])) - { - $server['signature_methods'] = strtoupper(implode(',', $server['signature_methods'])); - } - } - else - { - $server['signature_methods'] = ''; - } - - // When the user is an admin, then the user can update the user_id of this record - if ($user_is_admin && array_key_exists('user_id', $server)) - { - if (is_null($server['user_id'])) - { - $update_user = ', ocr_usa_id_ref = NULL'; - } - else - { - $update_user = ', ocr_usa_id_ref = '.intval($server['user_id']); - } - } - else - { - $update_user = ''; - } - - if (!empty($server['id'])) - { - // Check if the current user can update this server definition - if (!$user_is_admin) - { - $ocr_usa_id_ref = $this->query_one(' - SELECT ocr_usa_id_ref - FROM oauth_consumer_registry - WHERE ocr_id = %d - ', $server['id']); - - if ($ocr_usa_id_ref != $user_id) - { - throw new OAuthException('The user "'.$user_id.'" is not allowed to update this server'); - } - } - - // Update the consumer registration - $this->query(' - UPDATE oauth_consumer_registry - SET ocr_consumer_key = \'%s\', - ocr_consumer_secret = \'%s\', - ocr_server_uri = \'%s\', - ocr_server_uri_host = \'%s\', - ocr_server_uri_path = \'%s\', - ocr_timestamp = NOW(), - ocr_request_token_uri = \'%s\', - ocr_authorize_uri = \'%s\', - ocr_access_token_uri = \'%s\', - ocr_signature_methods = \'%s\' - '.$update_user.' - WHERE ocr_id = %d - ', - $server['consumer_key'], - $server['consumer_secret'], - $server['server_uri'], - strtolower($host), - $path, - isset($server['request_token_uri']) ? $server['request_token_uri'] : '', - isset($server['authorize_uri']) ? $server['authorize_uri'] : '', - isset($server['access_token_uri']) ? $server['access_token_uri'] : '', - $server['signature_methods'], - $server['id'] - ); - } - else - { - if (empty($update_user)) - { - // Per default the user owning the key is the user registering the key - $update_user = ', ocr_usa_id_ref = '.intval($user_id); - } - - $this->query(' - INSERT INTO oauth_consumer_registry - SET ocr_consumer_key = \'%s\', - ocr_consumer_secret = \'%s\', - ocr_server_uri = \'%s\', - ocr_server_uri_host = \'%s\', - ocr_server_uri_path = \'%s\', - ocr_timestamp = NOW(), - ocr_request_token_uri = \'%s\', - ocr_authorize_uri = \'%s\', - ocr_access_token_uri = \'%s\', - ocr_signature_methods = \'%s\' - '.$update_user, - $server['consumer_key'], - $server['consumer_secret'], - $server['server_uri'], - strtolower($host), - $path, - isset($server['request_token_uri']) ? $server['request_token_uri'] : '', - isset($server['authorize_uri']) ? $server['authorize_uri'] : '', - isset($server['access_token_uri']) ? $server['access_token_uri'] : '', - $server['signature_methods'] - ); - - $ocr_id = $this->query_insert_id(); - } - return $server['consumer_key']; - } - - - /** - * Insert/update a new consumer with this server (we will be the server) - * When this is a new consumer, then also generate the consumer key and secret. - * Never updates the consumer key and secret. - * When the id is set, then the key and secret must correspond to the entry - * being updated. - * - * (This is the registry at the server, registering consumers ;-) ) - * - * @param array consumer - * @param int user_id user registering this consumer - * @param boolean user_is_admin - * @return string consumer key - */ - public function updateConsumer ( $consumer, $user_id, $user_is_admin = false ) - { - if (!$user_is_admin) - { - foreach (array('requester_name', 'requester_email') as $f) - { - if (empty($consumer[$f])) - { - throw new OAuthException('The field "'.$f.'" must be set and non empty'); - } - } - } - - if (!empty($consumer['id'])) - { - if (empty($consumer['consumer_key'])) - { - throw new OAuthException('The field "consumer_key" must be set and non empty'); - } - if (!$user_is_admin && empty($consumer['consumer_secret'])) - { - throw new OAuthException('The field "consumer_secret" must be set and non empty'); - } - - // Check if the current user can update this server definition - if (!$user_is_admin) - { - $osr_usa_id_ref = $this->query_one(' - SELECT osr_usa_id_ref - FROM oauth_server_registry - WHERE osr_id = %d - ', $consumer['id']); - - if ($osr_usa_id_ref != $user_id) - { - throw new OAuthException('The user "'.$user_id.'" is not allowed to update this consumer'); - } - } - else - { - // User is an admin, allow a key owner to be changed or key to be shared - if (array_key_exists('user_id',$consumer)) - { - if (is_null($consumer['user_id'])) - { - $this->query(' - UPDATE oauth_server_registry - SET osr_usa_id_ref = NULL - WHERE osr_id = %d - ', $consumer['id']); - } - else - { - $this->query(' - UPDATE oauth_server_registry - SET osr_usa_id_ref = %d - WHERE osr_id = %d - ', $consumer['user_id'], $consumer['id']); - } - } - } - - $this->query(' - UPDATE oauth_server_registry - SET osr_requester_name = \'%s\', - osr_requester_email = \'%s\', - osr_callback_uri = \'%s\', - osr_application_uri = \'%s\', - osr_application_title = \'%s\', - osr_application_descr = \'%s\', - osr_application_notes = \'%s\', - osr_application_type = \'%s\', - osr_application_commercial = IF(%d,1,0), - osr_timestamp = NOW() - WHERE osr_id = %d - AND osr_consumer_key = \'%s\' - AND osr_consumer_secret = \'%s\' - ', - $consumer['requester_name'], - $consumer['requester_email'], - isset($consumer['callback_uri']) ? $consumer['callback_uri'] : '', - isset($consumer['application_uri']) ? $consumer['application_uri'] : '', - isset($consumer['application_title']) ? $consumer['application_title'] : '', - isset($consumer['application_descr']) ? $consumer['application_descr'] : '', - isset($consumer['application_notes']) ? $consumer['application_notes'] : '', - isset($consumer['application_type']) ? $consumer['application_type'] : '', - isset($consumer['application_commercial']) ? $consumer['application_commercial'] : 0, - $consumer['id'], - $consumer['consumer_key'], - $consumer['consumer_secret'] - ); - - - $consumer_key = $consumer['consumer_key']; - } - else - { - $consumer_key = $this->generateKey(true); - $consumer_secret= $this->generateKey(); - - // When the user is an admin, then the user can be forced to something else that the user - if ($user_is_admin && array_key_exists('user_id',$consumer)) - { - if (is_null($consumer['user_id'])) - { - $owner_id = 'NULL'; - } - else - { - $owner_id = intval($consumer['user_id']); - } - } - else - { - // No admin, take the user id as the owner id. - $owner_id = intval($user_id); - } - - $this->query(' - INSERT INTO oauth_server_registry - SET osr_enabled = 1, - osr_status = \'active\', - osr_usa_id_ref = %s, - osr_consumer_key = \'%s\', - osr_consumer_secret = \'%s\', - osr_requester_name = \'%s\', - osr_requester_email = \'%s\', - osr_callback_uri = \'%s\', - osr_application_uri = \'%s\', - osr_application_title = \'%s\', - osr_application_descr = \'%s\', - osr_application_notes = \'%s\', - osr_application_type = \'%s\', - osr_application_commercial = IF(%d,1,0), - osr_timestamp = NOW(), - osr_issue_date = NOW() - ', - $owner_id, - $consumer_key, - $consumer_secret, - $consumer['requester_name'], - $consumer['requester_email'], - isset($consumer['callback_uri']) ? $consumer['callback_uri'] : '', - isset($consumer['application_uri']) ? $consumer['application_uri'] : '', - isset($consumer['application_title']) ? $consumer['application_title'] : '', - isset($consumer['application_descr']) ? $consumer['application_descr'] : '', - isset($consumer['application_notes']) ? $consumer['application_notes'] : '', - isset($consumer['application_type']) ? $consumer['application_type'] : '', - isset($consumer['application_commercial']) ? $consumer['application_commercial'] : 0 - ); - } - return $consumer_key; - - } - - - - /** - * Delete a consumer key. This removes access to our site for all applications using this key. - * - * @param string consumer_key - * @param int user_id user registering this server - * @param boolean user_is_admin - */ - public function deleteConsumer ( $consumer_key, $user_id, $user_is_admin = false ) - { - if ($user_is_admin) - { - $this->query(' - DELETE FROM oauth_server_registry - WHERE osr_consumer_key = \'%s\' - AND (osr_usa_id_ref = %d OR osr_usa_id_ref IS NULL) - ', $consumer_key, $user_id); - } - else - { - $this->query(' - DELETE FROM oauth_server_registry - WHERE osr_consumer_key = \'%s\' - AND osr_usa_id_ref = %d - ', $consumer_key, $user_id); - } - } - - - - /** - * Fetch a consumer of this server, by consumer_key. - * - * @param string consumer_key - * @param int user_id - * @param boolean user_is_admin (optional) - * @exception OAuthException when consumer not found - * @return array - */ - public function getConsumer ( $consumer_key, $user_id, $user_is_admin = false ) - { - $consumer = $this->query_row_assoc(' - SELECT * - FROM oauth_server_registry - WHERE osr_consumer_key = \'%s\' - ', $consumer_key); - - if (!is_array($consumer)) - { - throw new OAuthException('No consumer with consumer_key "'.$consumer_key.'"'); - } - - $c = array(); - foreach ($consumer as $key => $value) - { - $c[substr($key, 4)] = $value; - } - $c['user_id'] = $c['usa_id_ref']; - - if (!$user_is_admin && !empty($c['user_id']) && $c['user_id'] != $user_id) - { - throw new OAuthException('No access to the consumer information for consumer_key "'.$consumer_key.'"'); - } - return $c; - } - - - /** - * Fetch the static consumer key for this provider. The user for the static consumer - * key is NULL (no user, shared key). If the key did not exist then the key is created. - * - * @return string - */ - public function getConsumerStatic () - { - $consumer = $this->query_one(' - SELECT osr_consumer_key - FROM oauth_server_registry - WHERE osr_consumer_key LIKE \'sc-%%\' - AND osr_usa_id_ref IS NULL - '); - - if (empty($consumer)) - { - $consumer_key = 'sc-'.$this->generateKey(true); - $this->query(' - INSERT INTO oauth_server_registry - SET osr_enabled = 1, - osr_status = \'active\', - osr_usa_id_ref = NULL, - osr_consumer_key = \'%s\', - osr_consumer_secret = \'\', - osr_requester_name = \'\', - osr_requester_email = \'\', - osr_callback_uri = \'\', - osr_application_uri = \'\', - osr_application_title = \'Static shared consumer key\', - osr_application_descr = \'\', - osr_application_notes = \'Static shared consumer key\', - osr_application_type = \'\', - osr_application_commercial = 0, - osr_timestamp = NOW(), - osr_issue_date = NOW() - ', - $consumer_key - ); - - // Just make sure that if the consumer key is truncated that we get the truncated string - $consumer = $this->getConsumerStatic(); - } - return $consumer; - } - - - /** - * Add an unautorized request token to our server. - * - * @param string consumer_key - * @param array options (eg. token_ttl) - * @return array (token, token_secret) - */ - public function addConsumerRequestToken ( $consumer_key, $options = array() ) - { - $token = $this->generateKey(true); - $secret = $this->generateKey(); - $osr_id = $this->query_one(' - SELECT osr_id - FROM oauth_server_registry - WHERE osr_consumer_key = \'%s\' - AND osr_enabled = 1 - ', $consumer_key); - - if (!$osr_id) - { - throw new OAuthException('No server with consumer_key "'.$consumer_key.'" or consumer_key is disabled'); - } - - if (isset($options['token_ttl']) && is_numeric($options['token_ttl'])) - { - $ttl = intval($options['token_ttl']); - } - else - { - $ttl = $this->max_request_token_ttl; - } - - $this->query(' - INSERT INTO oauth_server_token - SET ost_osr_id_ref = %d, - ost_usa_id_ref = 1, - ost_token = \'%s\', - ost_token_secret = \'%s\', - ost_token_type = \'request\', - ost_token_ttl = DATE_ADD(NOW(), INTERVAL %d SECOND) - ON DUPLICATE KEY UPDATE - ost_osr_id_ref = VALUES(ost_osr_id_ref), - ost_usa_id_ref = VALUES(ost_usa_id_ref), - ost_token = VALUES(ost_token), - ost_token_secret = VALUES(ost_token_secret), - ost_token_type = VALUES(ost_token_type), - ost_token_ttl = VALUES(ost_token_ttl), - ost_timestamp = NOW() - ', $osr_id, $token, $secret, $ttl); - - return array('token'=>$token, 'token_secret'=>$secret, 'token_ttl'=>$ttl); - } - - - /** - * Fetch the consumer request token, by request token. - * - * @param string token - * @return array token and consumer details - */ - public function getConsumerRequestToken ( $token ) - { - $rs = $this->query_row_assoc(' - SELECT ost_token as token, - ost_token_secret as token_secret, - osr_consumer_key as consumer_key, - osr_consumer_secret as consumer_secret, - ost_token_type as token_type - FROM oauth_server_token - JOIN oauth_server_registry - ON ost_osr_id_ref = osr_id - WHERE ost_token_type = \'request\' - AND ost_token = \'%s\' - AND ost_token_ttl >= NOW() - ', $token); - - return $rs; - } - - - /** - * Delete a consumer token. The token must be a request or authorized token. - * - * @param string token - */ - public function deleteConsumerRequestToken ( $token ) - { - $this->query(' - DELETE FROM oauth_server_token - WHERE ost_token = \'%s\' - AND ost_token_type = \'request\' - ', $token); - } - - - /** - * Upgrade a request token to be an authorized request token. - * - * @param string token - * @param int user_id user authorizing the token - * @param string referrer_host used to set the referrer host for this token, for user feedback - */ - public function authorizeConsumerRequestToken ( $token, $user_id, $referrer_host = '' ) - { - $this->query(' - UPDATE oauth_server_token - SET ost_authorized = 1, - ost_usa_id_ref = %d, - ost_timestamp = NOW(), - ost_referrer_host = \'%s\' - WHERE ost_token = \'%s\' - AND ost_token_type = \'request\' - ', $user_id, $referrer_host, $token); - } - - - /** - * Count the consumer access tokens for the given consumer. - * - * @param string consumer_key - * @return int - */ - public function countConsumerAccessTokens ( $consumer_key ) - { - $count = $this->query_one(' - SELECT COUNT(ost_id) - FROM oauth_server_token - JOIN oauth_server_registry - ON ost_osr_id_ref = osr_id - WHERE ost_token_type = \'access\' - AND osr_consumer_key = \'%s\' - AND ost_token_ttl >= NOW() - ', $consumer_key); - - return $count; - } - - - /** - * Exchange an authorized request token for new access token. - * - * @param string token - * @param array options options for the token, token_ttl - * @exception OAuthException when token could not be exchanged - * @return array (token, token_secret) - */ - public function exchangeConsumerRequestForAccessToken ( $token, $options = array() ) - { - $new_token = $this->generateKey(true); - $new_secret = $this->generateKey(); - - // Maximum time to live for this token - if (isset($options['token_ttl']) && is_numeric($options['token_ttl'])) - { - $ttl_sql = 'DATE_ADD(NOW(), INTERVAL '.intval($options['token_ttl']).' SECOND)'; - } - else - { - $ttl_sql = "'9999-12-31'"; - } - - $this->query(' - UPDATE oauth_server_token - SET ost_token = \'%s\', - ost_token_secret = \'%s\', - ost_token_type = \'access\', - ost_timestamp = NOW(), - ost_token_ttl = '.$ttl_sql.' - WHERE ost_token = \'%s\' - AND ost_token_type = \'request\' - AND ost_authorized = 1 - AND ost_token_ttl >= NOW() - ', $new_token, $new_secret, $token); - - if ($this->query_affected_rows() != 1) - { - throw new OAuthException('Can\'t exchange request token "'.$token.'" for access token. No such token or not authorized'); - } - - $ret = array('token' => $new_token, 'token_secret' => $new_secret); - $ttl = $this->query_one(' - SELECT IF(ost_token_ttl >= \'9999-12-31\', NULL, UNIX_TIMESTAMP(ost_token_ttl) - UNIX_TIMESTAMP(NOW())) as token_ttl - FROM oauth_server_token - WHERE ost_token = \'%s\'', $new_token); - - if (is_numeric($ttl)) - { - $ret['token_ttl'] = intval($ttl); - } - return $ret; - } - - - /** - * Fetch the consumer access token, by access token. - * - * @param string token - * @param int user_id - * @exception OAuthException when token is not found - * @return array token and consumer details - */ - public function getConsumerAccessToken ( $token, $user_id ) - { - $rs = $this->query_row_assoc(' - SELECT ost_token as token, - ost_token_secret as token_secret, - ost_referrer_host as token_referrer_host, - osr_consumer_key as consumer_key, - osr_consumer_secret as consumer_secret, - osr_application_uri as application_uri, - osr_application_title as application_title, - osr_application_descr as application_descr - FROM oauth_server_token - JOIN oauth_server_registry - ON ost_osr_id_ref = osr_id - WHERE ost_token_type = \'access\' - AND ost_token = \'%s\' - AND ost_usa_id_ref = %d - AND ost_token_ttl >= NOW() - ', $token, $user_id); - - if (empty($rs)) - { - throw new OAuthException('No server_token "'.$token.'" for user "'.$user_id.'"'); - } - return $rs; - } - - - /** - * Delete a consumer access token. - * - * @param string token - * @param int user_id - * @param boolean user_is_admin - */ - public function deleteConsumerAccessToken ( $token, $user_id, $user_is_admin = false ) - { - if ($user_is_admin) - { - $this->query(' - DELETE FROM oauth_server_token - WHERE ost_token = \'%s\' - AND ost_token_type = \'access\' - ', $token); - } - else - { - $this->query(' - DELETE FROM oauth_server_token - WHERE ost_token = \'%s\' - AND ost_token_type = \'access\' - AND ost_usa_id_ref = %d - ', $token, $user_id); - } - } - - - /** - * Set the ttl of a consumer access token. This is done when the - * server receives a valid request with a xoauth_token_ttl parameter in it. - * - * @param string token - * @param int ttl - */ - public function setConsumerAccessTokenTtl ( $token, $token_ttl ) - { - if ($token_ttl <= 0) - { - // Immediate delete when the token is past its ttl - $this->deleteConsumerAccessToken($token, 0, true); - } - else - { - // Set maximum time to live for this token - $this->query(' - UPDATE oauth_server_token - SET ost_token_ttl = DATE_ADD(NOW(), INTERVAL %d SECOND) - WHERE ost_token = \'%s\' - AND ost_token_type = \'access\' - ', $token_ttl, $token); - } - } - - - /** - * Fetch a list of all consumer keys, secrets etc. - * Returns the public (user_id is null) and the keys owned by the user - * - * @param int user_id - * @return array - */ - public function listConsumers ( $user_id ) - { - $rs = $this->query_all_assoc(' - SELECT osr_id as id, - osr_usa_id_ref as user_id, - osr_consumer_key as consumer_key, - osr_consumer_secret as consumer_secret, - osr_enabled as enabled, - osr_status as status, - osr_issue_date as issue_date, - osr_application_uri as application_uri, - osr_application_title as application_title, - osr_application_descr as application_descr, - osr_requester_name as requester_name, - osr_requester_email as requester_email - FROM oauth_server_registry - WHERE (osr_usa_id_ref = %d OR osr_usa_id_ref IS NULL) - ORDER BY osr_application_title - ', $user_id); - return $rs; - } - - - /** - * Fetch a list of all consumer tokens accessing the account of the given user. - * - * @param int user_id - * @return array - */ - public function listConsumerTokens ( $user_id ) - { - $rs = $this->query_all_assoc(' - SELECT osr_consumer_key as consumer_key, - osr_consumer_secret as consumer_secret, - osr_enabled as enabled, - osr_status as status, - osr_application_uri as application_uri, - osr_application_title as application_title, - osr_application_descr as application_descr, - ost_timestamp as timestamp, - ost_token as token, - ost_token_secret as token_secret, - ost_referrer_host as token_referrer_host - FROM oauth_server_registry - JOIN oauth_server_token - ON ost_osr_id_ref = osr_id - WHERE ost_usa_id_ref = %d - AND ost_token_type = \'access\' - AND ost_token_ttl >= NOW() - ORDER BY osr_application_title - ', $user_id); - return $rs; - } - - - /** - * Check an nonce/timestamp combination. Clears any nonce combinations - * that are older than the one received. - * - * @param string consumer_key - * @param string token - * @param int timestamp - * @param string nonce - * @exception OAuthException thrown when the timestamp is not in sequence or nonce is not unique - */ - public function checkServerNonce ( $consumer_key, $token, $timestamp, $nonce ) - { - $r = $this->query_row(' - SELECT MAX(osn_timestamp), MAX(osn_timestamp) > %d + %d - FROM oauth_server_nonce - WHERE osn_consumer_key = \'%s\' - AND osn_token = \'%s\' - ', $timestamp, $this->max_timestamp_skew, $consumer_key, $token); - - if (!empty($r) && $r[1]) - { - throw new OAuthException('Timestamp is out of sequence. Request rejected. Got '.$timestamp.' last max is '.$r[0].' allowed skew is '.$this->max_timestamp_skew); - } - - // Insert the new combination - $this->query(' - INSERT IGNORE INTO oauth_server_nonce - SET osn_consumer_key = \'%s\', - osn_token = \'%s\', - osn_timestamp = %d, - osn_nonce = \'%s\' - ', $consumer_key, $token, $timestamp, $nonce); - - if ($this->query_affected_rows() == 0) - { - throw new OAuthException('Duplicate timestamp/nonce combination, possible replay attack. Request rejected.'); - } - - // Clean up all timestamps older than the one we just received - $this->query(' - DELETE FROM oauth_server_nonce - WHERE osn_consumer_key = \'%s\' - AND osn_token = \'%s\' - AND osn_timestamp < %d - %d - ', $consumer_key, $token, $timestamp, $this->max_timestamp_skew); - } - - - /** - * Add an entry to the log table - * - * @param array keys (osr_consumer_key, ost_token, ocr_consumer_key, oct_token) - * @param string received - * @param string sent - * @param string base_string - * @param string notes - * @param int (optional) user_id - */ - public function addLog ( $keys, $received, $sent, $base_string, $notes, $user_id = null ) - { - $args = array(); - $ps = array(); - foreach ($keys as $key => $value) - { - $args[] = $value; - $ps[] = "olg_$key = '%s'"; - } - - if (!empty($_SERVER['REMOTE_ADDR'])) - { - $remote_ip = $_SERVER['REMOTE_ADDR']; - } - else if (!empty($_SERVER['REMOTE_IP'])) - { - $remote_ip = $_SERVER['REMOTE_IP']; - } - else - { - $remote_ip = '0.0.0.0'; - } - - // Build the SQL - $ps[] = "olg_received = '%s'"; $args[] = $this->makeUTF8($received); - $ps[] = "olg_sent = '%s'"; $args[] = $this->makeUTF8($sent); - $ps[] = "olg_base_string= '%s'"; $args[] = $base_string; - $ps[] = "olg_notes = '%s'"; $args[] = $this->makeUTF8($notes); - $ps[] = "olg_usa_id_ref = NULLIF(%d,0)"; $args[] = $user_id; - $ps[] = "olg_remote_ip = IFNULL(INET_ATON('%s'),0)"; $args[] = $remote_ip; - - $this->query('INSERT INTO oauth_log SET '.implode(',', $ps), $args); - } - - - /** - * Get a page of entries from the log. Returns the last 100 records - * matching the options given. - * - * @param array options - * @param int user_id current user - * @return array log records - */ - public function listLog ( $options, $user_id ) - { - $where = array(); - $args = array(); - if (empty($options)) - { - $where[] = 'olg_usa_id_ref = %d'; - $args[] = $user_id; - } - else - { - foreach ($options as $option => $value) - { - if (strlen($value) > 0) - { - switch ($option) - { - case 'osr_consumer_key': - case 'ocr_consumer_key': - case 'ost_token': - case 'oct_token': - $where[] = 'olg_'.$option.' = \'%s\''; - $args[] = $value; - break; - } - } - } - - $where[] = '(olg_usa_id_ref IS NULL OR olg_usa_id_ref = %d)'; - $args[] = $user_id; - } - - $rs = $this->query_all_assoc(' - SELECT olg_id, - olg_osr_consumer_key AS osr_consumer_key, - olg_ost_token AS ost_token, - olg_ocr_consumer_key AS ocr_consumer_key, - olg_oct_token AS oct_token, - olg_usa_id_ref AS user_id, - olg_received AS received, - olg_sent AS sent, - olg_base_string AS base_string, - olg_notes AS notes, - olg_timestamp AS timestamp, - INET_NTOA(olg_remote_ip) AS remote_ip - FROM oauth_log - WHERE '.implode(' AND ', $where).' - ORDER BY olg_id DESC - LIMIT 0,100', $args); - - return $rs; - } - - - - /** - * Initialise the database - */ - public function install () - { - require_once dirname(__FILE__) . '/mysql/install.php'; - } - - - /* ** Some simple helper functions for querying the mysql db ** */ - - /** - * Perform a query, ignore the results - * - * @param string sql - * @param vararg arguments (for sprintf) - */ - protected function query ( $sql ) - { - $sql = $this->sql_printf(func_get_args()); - if (!($res = mysql_query($sql, $this->conn))) - { - $this->sql_errcheck($sql); - } - if (is_resource($res)) - { - mysql_free_result($res); - } - } - - - /** - * Perform a query, ignore the results - * - * @param string sql - * @param vararg arguments (for sprintf) - * @return array - */ - protected function query_all_assoc ( $sql ) - { - $sql = $this->sql_printf(func_get_args()); - if (!($res = mysql_query($sql, $this->conn))) - { - $this->sql_errcheck($sql); - } - $rs = array(); - while ($row = mysql_fetch_assoc($res)) - { - $rs[] = $row; - } - mysql_free_result($res); - return $rs; - } - - - /** - * Perform a query, return the first row - * - * @param string sql - * @param vararg arguments (for sprintf) - * @return array - */ - protected function query_row_assoc ( $sql ) - { - $sql = $this->sql_printf(func_get_args()); - if (!($res = mysql_query($sql, $this->conn))) - { - $this->sql_errcheck($sql); - } - if ($row = mysql_fetch_assoc($res)) - { - $rs = $row; - } - else - { - $rs = false; - } - mysql_free_result($res); - return $rs; - } - - - /** - * Perform a query, return the first row - * - * @param string sql - * @param vararg arguments (for sprintf) - * @return array - */ - protected function query_row ( $sql ) - { - $sql = $this->sql_printf(func_get_args()); - if (!($res = mysql_query($sql, $this->conn))) - { - $this->sql_errcheck($sql); - } - if ($row = mysql_fetch_array($res)) - { - $rs = $row; - } - else - { - $rs = false; - } - mysql_free_result($res); - return $rs; - } - - - /** - * Perform a query, return the first column of the first row - * - * @param string sql - * @param vararg arguments (for sprintf) - * @return mixed - */ - protected function query_one ( $sql ) - { - $sql = $this->sql_printf(func_get_args()); - if (!($res = mysql_query($sql, $this->conn))) - { - $this->sql_errcheck($sql); - } - $val = @mysql_result($res, 0, 0); - mysql_free_result($res); - return $val; - } - - - /** - * Return the number of rows affected in the last query - */ - protected function query_affected_rows () - { - return mysql_affected_rows($this->conn); - } - - - /** - * Return the id of the last inserted row - * - * @return int - */ - protected function query_insert_id () - { - return mysql_insert_id($this->conn); - } - - - protected function sql_printf ( $args ) - { - $sql = array_shift($args); - if (count($args) == 1 && is_array($args[0])) - { - $args = $args[0]; - } - $args = array_map(array($this, 'sql_escape_string'), $args); - return vsprintf($sql, $args); - } - - - protected function sql_escape_string ( $s ) - { - if (is_string($s)) - { - return mysql_real_escape_string($s, $this->conn); - } - else if (is_null($s)) - { - return NULL; - } - else if (is_bool($s)) - { - return intval($s); - } - else if (is_int($s) || is_float($s)) - { - return $s; - } - else - { - return mysql_real_escape_string(strval($s), $this->conn); - } - } - - - protected function sql_errcheck ( $sql ) - { - if (mysql_errno($this->conn)) - { - $msg = "SQL Error in OAuthStoreMySQL: ".mysql_error($this->conn)."\n\n" . $sql; - throw new OAuthException($msg); - } - } -} - - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/store/mysql/install.php b/mod/oauth_api/vendors/oauth/library/store/mysql/install.php deleted file mode 100644 index 0015da5e3..000000000 --- a/mod/oauth_api/vendors/oauth/library/store/mysql/install.php +++ /dev/null @@ -1,32 +0,0 @@ - \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/library/store/mysql/mysql.sql b/mod/oauth_api/vendors/oauth/library/store/mysql/mysql.sql deleted file mode 100644 index d652a1c99..000000000 --- a/mod/oauth_api/vendors/oauth/library/store/mysql/mysql.sql +++ /dev/null @@ -1,219 +0,0 @@ -# Datamodel for OAuthStoreMySQL -# -# You need to add the foreign key constraints for the user ids your are using. -# I have commented the constraints out, just look for 'usa_id_ref' to enable them. -# -# The --SPLIT-- markers are used by the install.php script -# -# @version $Id: mysql.sql 51 2008-10-15 15:15:47Z marcw@pobox.com $ -# @author Marc Worrell -# - -# Changes: -# -# 2008-10-15 (on r48) Added ttl to consumer and server tokens, added named server tokens -# -# ALTER TABLE oauth_server_token -# ADD ost_token_ttl datetime not null default '9999-12-31', -# ADD KEY (ost_token_ttl); -# -# ALTER TABLE oauth_consumer_token -# ADD oct_name varchar(64) binary not null default '', -# ADD oct_token_ttl datetime not null default '9999-12-31', -# DROP KEY oct_usa_id_ref, -# ADD UNIQUE KEY (oct_usa_id_ref, oct_ocr_id_ref, oct_token_type, oct_name), -# ADD KEY (oct_token_ttl); -# -# 2008-09-09 (on r5) Added referrer host to server access token -# -# ALTER TABLE oauth_server_token ADD ost_referrer_host VARCHAR(128) NOT NULL; -# - - -# -# Log table to hold all OAuth request when you enabled logging -# - -CREATE TABLE IF NOT EXISTS oauth_log ( - olg_id int(11) not null auto_increment, - olg_osr_consumer_key varchar(64) binary, - olg_ost_token varchar(64) binary, - olg_ocr_consumer_key varchar(64) binary, - olg_oct_token varchar(64) binary, - olg_usa_id_ref int(11), - olg_received text not null, - olg_sent text not null, - olg_base_string text not null, - olg_notes text not null, - olg_timestamp timestamp not null default current_timestamp, - olg_remote_ip bigint not null, - - primary key (olg_id), - key (olg_osr_consumer_key, olg_id), - key (olg_ost_token, olg_id), - key (olg_ocr_consumer_key, olg_id), - key (olg_oct_token, olg_id), - key (olg_usa_id_ref, olg_id) - -# , foreign key (olg_usa_id_ref) references any_user_auth (usa_id_ref) -# on update cascade -# on delete cascade -) engine=InnoDB default charset=utf8; - -#--SPLIT-- - -# -# /////////////////// CONSUMER SIDE /////////////////// -# - -# This is a registry of all consumer codes we got from other servers -# The consumer_key/secret is obtained from the server -# We also register the server uri, so that we can find the consumer key and secret -# for a certain server. From that server we can check if we have a token for a -# particular user. - -CREATE TABLE IF NOT EXISTS oauth_consumer_registry ( - ocr_id int(11) not null auto_increment, - ocr_usa_id_ref int(11), - ocr_consumer_key varchar(64) binary not null, - ocr_consumer_secret varchar(64) binary not null, - ocr_signature_methods varchar(255) not null default 'HMAC-SHA1,PLAINTEXT', - ocr_server_uri varchar(255) not null, - ocr_server_uri_host varchar(128) not null, - ocr_server_uri_path varchar(128) binary not null, - - ocr_request_token_uri varchar(255) not null, - ocr_authorize_uri varchar(255) not null, - ocr_access_token_uri varchar(255) not null, - ocr_timestamp timestamp not null default current_timestamp, - - primary key (ocr_id), - unique key (ocr_consumer_key, ocr_usa_id_ref), - key (ocr_server_uri), - key (ocr_server_uri_host, ocr_server_uri_path), - key (ocr_usa_id_ref) - -# , foreign key (ocr_usa_id_ref) references any_user_auth(usa_id_ref) -# on update cascade -# on delete set null -) engine=InnoDB default charset=utf8; - -#--SPLIT-- - -# Table used to sign requests for sending to a server by the consumer -# The key is defined for a particular user. Only one single named -# key is allowed per user/server combination - -CREATE TABLE IF NOT EXISTS oauth_consumer_token ( - oct_id int(11) not null auto_increment, - oct_ocr_id_ref int(11) not null, - oct_usa_id_ref int(11) not null, - oct_name varchar(64) binary not null default '', - oct_token varchar(64) binary not null, - oct_token_secret varchar(64) binary not null, - oct_token_type enum('request','authorized','access'), - oct_token_ttl datetime not null default '9999-12-31', - oct_timestamp timestamp not null default current_timestamp, - - primary key (oct_id), - unique key (oct_ocr_id_ref, oct_token), - unique key (oct_usa_id_ref, oct_ocr_id_ref, oct_token_type, oct_name), - key (oct_token_ttl), - - foreign key (oct_ocr_id_ref) references oauth_consumer_registry (ocr_id) - on update cascade - on delete cascade - -# , foreign key (oct_usa_id_ref) references any_user_auth (usa_id_ref) -# on update cascade -# on delete cascade -) engine=InnoDB default charset=utf8; - -#--SPLIT-- - - -# -# ////////////////// SERVER SIDE ///////////////// -# - -# Table holding consumer key/secret combos an user issued to consumers. -# Used for verification of incoming requests. - -CREATE TABLE IF NOT EXISTS oauth_server_registry ( - osr_id int(11) not null auto_increment, - osr_usa_id_ref int(11), - osr_consumer_key varchar(64) binary not null, - osr_consumer_secret varchar(64) binary not null, - osr_enabled tinyint(1) not null default '1', - osr_status varchar(16) not null, - osr_requester_name varchar(64) not null, - osr_requester_email varchar(64) not null, - osr_callback_uri varchar(255) not null, - osr_application_uri varchar(255) not null, - osr_application_title varchar(80) not null, - osr_application_descr text not null, - osr_application_notes text not null, - osr_application_type varchar(20) not null, - osr_application_commercial tinyint(1) not null default '0', - osr_issue_date datetime not null, - osr_timestamp timestamp not null default current_timestamp, - - primary key (osr_id), - unique key (osr_consumer_key), - key (osr_usa_id_ref) - -# , foreign key (osr_usa_id_ref) references any_user_auth(usa_id_ref) -# on update cascade -# on delete set null -) engine=InnoDB default charset=utf8; - -#--SPLIT-- - -# Nonce used by a certain consumer, every used nonce should be unique, this prevents -# replaying attacks. We need to store all timestamp/nonce combinations for the -# maximum timestamp received. - -CREATE TABLE IF NOT EXISTS oauth_server_nonce ( - osn_id int(11) not null auto_increment, - osn_consumer_key varchar(64) binary not null, - osn_token varchar(64) binary not null, - osn_timestamp bigint not null, - osn_nonce varchar(80) binary not null, - - primary key (osn_id), - unique key (osn_consumer_key, osn_token, osn_timestamp, osn_nonce) -) engine=InnoDB default charset=utf8; - -#--SPLIT-- - -# Table used to verify signed requests sent to a server by the consumer -# When the verification is succesful then the associated user id is returned. - -CREATE TABLE IF NOT EXISTS oauth_server_token ( - ost_id int(11) not null auto_increment, - ost_osr_id_ref int(11) not null, - ost_usa_id_ref int(11) not null, - ost_token varchar(64) binary not null, - ost_token_secret varchar(64) binary not null, - ost_token_type enum('request','access'), - ost_authorized tinyint(1) not null default '0', - ost_referrer_host varchar(128) not null, - ost_token_ttl datetime not null default '9999-12-31', - ost_timestamp timestamp not null default current_timestamp, - - primary key (ost_id), - unique key (ost_token), - key (ost_osr_id_ref), - key (ost_token_ttl), - - foreign key (ost_osr_id_ref) references oauth_server_registry (osr_id) - on update cascade - on delete cascade - -# , foreign key (ost_usa_id_ref) references any_user_auth (usa_id_ref) -# on update cascade -# on delete cascade -) engine=InnoDB default charset=utf8; - - - diff --git a/mod/oauth_api/vendors/oauth/test/discovery/xrds-fireeagle.xrds b/mod/oauth_api/vendors/oauth/test/discovery/xrds-fireeagle.xrds deleted file mode 100644 index 0f5eba222..000000000 --- a/mod/oauth_api/vendors/oauth/test/discovery/xrds-fireeagle.xrds +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - - xri://$xrds*simple - 2008-04-15T00:25:30-07:00 - - - - http://oauth.net/core/1.0/endpoint/request - - http://oauth.net/core/1.0/parameters/auth-header - http://oauth.net/core/1.0/parameters/post-body - http://oauth.net/core/1.0/parameters/uri-query - http://oauth.net/core/1.0/signature/HMAC-SHA1 - http://oauth.net/core/1.0/signature/PLAINTEXT - - https://fireeagle.yahooapis.com/oauth/request_token - - - - - http://oauth.net/core/1.0/endpoint/authorize - - http://oauth.net/core/1.0/parameters/auth-header - http://oauth.net/core/1.0/parameters/uri-query - - https://fireeagle.yahooapis.com/oauth/access_token - - - - - http://oauth.net/core/1.0/endpoint/access - - http://oauth.net/core/1.0/parameters/auth-header - http://oauth.net/core/1.0/parameters/post-body - http://oauth.net/core/1.0/parameters/uri-query - http://oauth.net/core/1.0/signature/HMAC-SHA1 - http://oauth.net/core/1.0/signature/PLAINTEXT - - http://fireeagle.yahoo.net/oauth/authorize - - - - - http://oauth.net/core/1.0/endpoint/resource - - http://oauth.net/core/1.0/parameters/auth-header - http://oauth.net/core/1.0/parameters/post-body - http://oauth.net/core/1.0/parameters/uri-query - http://oauth.net/core/1.0/signature/HMAC-SHA1 - http://oauth.net/core/1.0/signature/PLAINTEXT - - - - - - - http://oauth.net/discovery/1.0/consumer-identity/oob - https://fireeagle.yahoo.net/developer/create - - - - - - - xri://$xrds*simple - - - - http://oauth.net/discovery/1.0 - #oauth - - - - \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/test/discovery/xrds-getsatisfaction.xrds b/mod/oauth_api/vendors/oauth/test/discovery/xrds-getsatisfaction.xrds deleted file mode 100644 index ab94b5bea..000000000 --- a/mod/oauth_api/vendors/oauth/test/discovery/xrds-getsatisfaction.xrds +++ /dev/null @@ -1,73 +0,0 @@ - - - - - xri://$xrds*simple - 2008-04-30T23:59:59Z - - - - http://oauth.net/core/1.0/endpoint/request - - http://oauth.net/core/1.0/parameters/auth-header - http://oauth.net/core/1.0/signature/HMAC-SHA1 - - http://getsatisfaction.com/api/request_token - - - - http://oauth.net/core/1.0/endpoint/authorize - - http://oauth.net/core/1.0/parameters/uri-query - - http://getsatisfaction.com/api/authorize - - - - - http://oauth.net/core/1.0/endpoint/access - - http://oauth.net/core/1.0/parameters/auth-header - http://oauth.net/core/1.0/signature/HMAC-SHA1 - - http://getsatisfaction.com/api/access_token - - - - - - http://oauth.net/core/1.0/endpoint/resource - - http://oauth.net/core/1.0/parameters/auth-header - http://oauth.net/core/1.0/signature/HMAC-SHA1 - - - - - - - http://oauth.net/discovery/1.0/consumer-identity/oob - http://getsatisfaction.com/me/extensions/new - - - - - - - xri://$xrds*simple - - - - http://oauth.net/discovery/1.0 - #oauth - - - - \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/test/discovery/xrds-magnolia.xrds b/mod/oauth_api/vendors/oauth/test/discovery/xrds-magnolia.xrds deleted file mode 100644 index 361b5c9a1..000000000 --- a/mod/oauth_api/vendors/oauth/test/discovery/xrds-magnolia.xrds +++ /dev/null @@ -1,81 +0,0 @@ - - - - - - - xri://$xrds*simple - 2008-04-13T07:34:58Z - - - - http://oauth.net/core/1.0/endpoint/request - - http://oauth.net/core/1.0/parameters/auth-header - http://oauth.net/core/1.0/parameters/post-body - http://oauth.net/core/1.0/parameters/uri-query - http://oauth.net/core/1.0/signature/HMAC-SHA1 - http://oauth.net/core/1.0/signature/RSA-SHA1 - http://oauth.net/core/1.0/signature/PLAINTEXT - - https://ma.gnolia.com/oauth/get_request_token - - - - - http://oauth.net/core/1.0/endpoint/authorize - - http://oauth.net/core/1.0/parameters/auth-header - http://oauth.net/core/1.0/parameters/uri-query - - https://ma.gnolia.com/oauth/authorize - http://ma.gnolia.com/oauth/authorize - - - - - http://oauth.net/core/1.0/endpoint/access - - http://oauth.net/core/1.0/parameters/auth-header - http://oauth.net/core/1.0/parameters/post-body - http://oauth.net/core/1.0/parameters/uri-query - http://oauth.net/core/1.0/signature/HMAC-SHA1 - http://oauth.net/core/1.0/signature/RSA-SHA1 - http://oauth.net/core/1.0/signature/PLAINTEXT - - https://ma.gnolia.com/oauth/get_access_token - - - - - http://oauth.net/core/1.0/endpoint/resource - - http://oauth.net/core/1.0/parameters/auth-header - http://oauth.net/core/1.0/parameters/post-body - http://oauth.net/core/1.0/parameters/uri-query - http://oauth.net/core/1.0/signature/HMAC-SHA1 - http://oauth.net/core/1.0/signature/RSA-SHA1 - - - - - - - http://oauth.net/discovery/1.0/consumer-identity/oob - http://ma.gnolia.com/applications/new - - - - - - - xri://$xrds*simple - - - - http://oauth.net/discovery/1.0 - #oauth - - - - \ No newline at end of file diff --git a/mod/oauth_api/vendors/oauth/test/oauth_test.php b/mod/oauth_api/vendors/oauth/test/oauth_test.php deleted file mode 100644 index 0c0504c70..000000000 --- a/mod/oauth_api/vendors/oauth/test/oauth_test.php +++ /dev/null @@ -1,188 +0,0 @@ - - * @date Nov 29, 2007 3:46:56 PM - * @see http://wiki.oauth.net/TestCases - * - * The MIT License - * - * Copyright (c) 2007-2008 Mediamatic Lab - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -require_once dirname(__FILE__) . '/../library/OAuthRequest.php'; -require_once dirname(__FILE__) . '/../library/OAuthRequester.php'; -require_once dirname(__FILE__) . '/../library/OAuthRequestSigner.php'; -require_once dirname(__FILE__) . '/../library/OAuthRequestVerifier.php'; - -if (!function_exists('getallheaders')) -{ - function getallheaders() - { - return array(); - } -} - - -oauth_test(); - -function oauth_test () -{ - error_reporting(E_ALL); - - header('Content-Type: text/plain; charset=utf-8'); - - echo "Performing OAuth module tests.\n\n"; - echo "See also: http://wiki.oauth.net/TestCases\n\n"; - - assert_options(ASSERT_CALLBACK, 'oauth_assert_handler'); - assert_options(ASSERT_WARNING, 0); - - $req = new OAuthRequest('http://www.example.com', 'GET'); - - echo "***** Parameter Encoding *****\n\n"; - - assert('$req->urlencode(\'abcABC123\') == \'abcABC123\''); - assert('$req->urlencode(\'-._~\') == \'-._~\''); - assert('$req->urlencode(\'%\') == \'%25\''); - assert('$req->urlencode(\'&=*\') == \'%26%3D%2A\''); - assert('$req->urlencode(\'&=*\') == \'%26%3D%2A\''); - assert('$req->urlencode("\n") == \'%0A\''); - assert('$req->urlencode(" ") == \'%20\''); - assert('$req->urlencode("\x7f") == \'%7F\''); - - - echo "***** Normalize Request Parameters *****\n\n"; - - $req = new OAuthRequest('http://example.com/?name', 'GET'); - assert('$req->getNormalizedParams() == \'name=\''); - - $req = new OAuthRequest('http://example.com/?a=b', 'GET'); - assert('$req->getNormalizedParams() == \'a=b\''); - - $req = new OAuthRequest('http://example.com/?a=b&c=d', 'GET'); - assert('$req->getNormalizedParams() == \'a=b&c=d\''); - - // At this moment we don't support two parameters with the same name - // so I changed this test case to "a=" and "b=" and not "a=" and "a=" - $req = new OAuthRequest('http://example.com/?b=x!y&a=x+y', 'GET'); - assert('$req->getNormalizedParams() == \'a=x%20y&b=x%21y\''); - - $req = new OAuthRequest('http://example.com/?x!y=a&x=a', 'GET'); - assert('$req->getNormalizedParams() == \'x=a&x%21y=a\''); - - - echo "***** Base String *****\n\n"; - - $req = new OAuthRequest('http://example.com/?n=v', 'GET'); - assert('$req->signatureBaseString() == \'GET&http%3A%2F%2Fexample.com%2F&n%3Dv\''); - - $req = new OAuthRequest( - 'https://photos.example.net/request_token', - 'POST', - 'oauth_version=1.0&oauth_consumer_key=dpf43f3p2l4k3l03&oauth_timestamp=1191242090&oauth_nonce=hsu94j3884jdopsl&oauth_signature_method=PLAINTEXT&oauth_signature=ignored', - array('X-OAuth-Test' => true)); - assert('$req->signatureBaseString() == \'POST&https%3A%2F%2Fphotos.example.net%2Frequest_token&oauth_consumer_key%3Ddpf43f3p2l4k3l03%26oauth_nonce%3Dhsu94j3884jdopsl%26oauth_signature_method%3DPLAINTEXT%26oauth_timestamp%3D1191242090%26oauth_version%3D1.0\''); - - $req = new OAuthRequest( - 'http://photos.example.net/photos?file=vacation.jpg&size=original&oauth_version=1.0&oauth_consumer_key=dpf43f3p2l4k3l03&oauth_token=nnch734d00sl2jdk&oauth_timestamp=1191242096&oauth_nonce=kllo9940pd9333jh&oauth_signature=ignored&oauth_signature_method=HMAC-SHA1', - 'GET'); - assert('$req->signatureBaseString() == \'GET&http%3A%2F%2Fphotos.example.net%2Fphotos&file%3Dvacation.jpg%26oauth_consumer_key%3Ddpf43f3p2l4k3l03%26oauth_nonce%3Dkllo9940pd9333jh%26oauth_signature_method%3DHMAC-SHA1%26oauth_timestamp%3D1191242096%26oauth_token%3Dnnch734d00sl2jdk%26oauth_version%3D1.0%26size%3Doriginal\''); - - - echo "***** HMAC-SHA1 *****\nRequest signing\n"; - - OAuthStore::instance('MySQL', array('conn'=>false)); - $req = new OAuthRequestSigner('http://photos.example.net/photos?file=vacation.jpg&size=original', 'GET'); - - assert('$req->urldecode($req->calculateDataSignature(\'bs\', \'cs\', \'\', \'HMAC-SHA1\')) == \'egQqG5AJep5sJ7anhXju1unge2I=\''); - assert('$req->urldecode($req->calculateDataSignature(\'bs\', \'cs\', \'ts\', \'HMAC-SHA1\')) == \'VZVjXceV7JgPq/dOTnNmEfO0Fv8=\''); - - $secrets = array( - 'consumer_key' => 'dpf43f3p2l4k3l03', - 'consumer_secret' => 'kd94hf93k423kf44', - 'token' => 'nnch734d00sl2jdk', - 'token_secret' => 'pfkkdhi9sl3r4s00', - 'signature_methods' => array('HMAC-SHA1'), - 'nonce' => 'kllo9940pd9333jh', - 'timestamp' => '1191242096' - ); - $req->sign(0, $secrets); - assert('$req->getParam(\'oauth_signature\', true) == \'tR3+Ty81lMeYAr/Fid0kMTYa/WM=\''); - - echo "***** HMAC-SHA1 *****\nRequest verification\n"; - - $req = new OAuthRequestVerifier( - 'http://photos.example.net/photos?file=vacation.jpg&size=original' - .'&oauth_consumer_key=dpf43f3p2l4k3l03&oauth_token=nnch734d00sl2jdk' - .'&oauth_signature_method=HMAC-SHA1&oauth_nonce=kllo9940pd9333jh' - .'&oauth_timestamp=1191242096&oauth_version=1.0' - .'&oauth_signature='.rawurlencode('tR3+Ty81lMeYAr/Fid0kMTYa/WM=') - , 'GET'); - - $req->verifySignature('kd94hf93k423kf44', 'pfkkdhi9sl3r4s00'); - - echo "\n"; - echo "***** Yahoo! test case ******\n\n"; - - OAuthStore::instance('MySQL', array('conn'=>false)); - $req = new OAuthRequestSigner('http://example.com:80/photo', 'GET'); - - $req->setParam('title', 'taken with a 30% orange filter'); - $req->setParam('file', 'mountain & water view'); - $req->setParam('format', 'jpeg'); - $req->setParam('include', array('date','aperture')); - - $secrets = array( - 'consumer_key' => '1234=asdf=4567', - 'consumer_secret' => 'erks823*43=asd&123ls%23', - 'token' => 'asdf-4354=asew-5698', - 'token_secret' => 'dis9$#$Js009%==', - 'signature_methods' => array('HMAC-SHA1'), - 'nonce' => '3jd834jd9', - 'timestamp' => '12303202302' - ); - $req->sign(0, $secrets); - - // echo "Basestring:\n",$req->signatureBaseString(), "\n\n"; - - //echo "queryString:\n",$req->getQueryString(), "\n\n"; - assert('$req->getQueryString() == \'title=taken%20with%20a%2030%25%20orange%20filter&file=mountain%20%26%20water%20view&format=jpeg&include=date&include=aperture\''); - - //echo "oauth_signature:\n",$req->getParam('oauth_signature', true),"\n\n"; - assert('$req->getParam(\'oauth_signature\', true) == \'jMdUSR1vOr3SzNv3gZ5DDDuGirA=\''); - - echo "\n\nFinished.\n"; -} - - -function oauth_assert_handler ( $file, $line, $code ) -{ - echo "\nAssertion failed in $file:$line - $code\n\n"; -} - -/* vi:set ts=4 sts=4 sw=4 binary noeol: */ - -?> \ No newline at end of file -- cgit v1.2.3 From 6c7ec418fc0a5b35489614325c807d6f523844df Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Mon, 15 Apr 2013 21:03:00 -0400 Subject: Fixes #5363: Append trailing slash to site URL if missing --- actions/admin/site/update_advanced.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/actions/admin/site/update_advanced.php b/actions/admin/site/update_advanced.php index 0fd8d1f35..4888b0a8d 100644 --- a/actions/admin/site/update_advanced.php +++ b/actions/admin/site/update_advanced.php @@ -14,10 +14,10 @@ if ($site = elgg_get_site_entity()) { throw new InstallationException(elgg_echo('InvalidParameterException:NonElggSite')); } - $site->url = get_input('wwwroot'); + $site->url = rtrim(get_input('wwwroot', '', false), '/') . '/'; - datalist_set('path', sanitise_filepath(get_input('path'))); - $dataroot = sanitise_filepath(get_input('dataroot')); + datalist_set('path', sanitise_filepath(get_input('path', '', false))); + $dataroot = sanitise_filepath(get_input('dataroot', '', false)); // check for relative paths if (stripos(PHP_OS, 'win') === 0) { -- cgit v1.2.3 From 16297373c2e91b6af32102a809b75326a44eb7f3 Mon Sep 17 00:00:00 2001 From: cash Date: Tue, 16 Apr 2013 19:19:08 -0400 Subject: fixed display issues on interstitial twitter page --- mod/twitter_api/pages/twitter_api/interstitial.php | 4 +--- .../views/default/forms/twitter_api/interstitial_settings.php | 7 ++++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/mod/twitter_api/pages/twitter_api/interstitial.php b/mod/twitter_api/pages/twitter_api/interstitial.php index d1f1ac20c..23b5069cb 100644 --- a/mod/twitter_api/pages/twitter_api/interstitial.php +++ b/mod/twitter_api/pages/twitter_api/interstitial.php @@ -8,9 +8,7 @@ $title = elgg_echo('twitter_api:interstitial:settings'); -$site = get_config('site'); -$content = elgg_echo('twitter_api:interstitial:description', array($site->name)); -$content .= elgg_view_form('twitter_api/interstitial_settings'); +$content = elgg_view_form('twitter_api/interstitial_settings'); $params = array( 'content' => $content, diff --git a/mod/twitter_api/views/default/forms/twitter_api/interstitial_settings.php b/mod/twitter_api/views/default/forms/twitter_api/interstitial_settings.php index cad2be345..b4882bb7f 100644 --- a/mod/twitter_api/views/default/forms/twitter_api/interstitial_settings.php +++ b/mod/twitter_api/views/default/forms/twitter_api/interstitial_settings.php @@ -3,6 +3,11 @@ * Make the user set up some alternative ways to login. */ +echo '
    '; +$site = get_config('site'); +echo elgg_echo('twitter_api:interstitial:description', array($site->name)); +echo '
    '; + $user = elgg_get_logged_in_user_entity(); if (elgg_is_sticky_form('twitter_api_interstitial')) { @@ -51,7 +56,7 @@ echo elgg_view_module('info', $title, $body); // buttons echo elgg_view('input/submit', array( - 'text' => elgg_echo('save') + 'value' => elgg_echo('save') )); echo elgg_view('output/url', array( -- cgit v1.2.3 From 21d8d10c3a3e974f1dfc110ad55e09a1d4db98c9 Mon Sep 17 00:00:00 2001 From: cash Date: Tue, 16 Apr 2013 19:27:18 -0400 Subject: Fixes #5362 correctly getting user on user plugin settings page --- engine/lib/user_settings.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/lib/user_settings.php b/engine/lib/user_settings.php index 3466c25f9..0e36dc46d 100644 --- a/engine/lib/user_settings.php +++ b/engine/lib/user_settings.php @@ -308,7 +308,7 @@ function usersettings_page_handler($page) { $user = get_user_by_username($page[1]); elgg_set_page_owner_guid($user->guid); } else { - $user = elgg_get_logged_in_user_guid(); + $user = elgg_get_logged_in_user_entity(); elgg_set_page_owner_guid($user->guid); } -- cgit v1.2.3 From f1d0a8e62f25002644c7b0394d5a8940d4ac7e5b Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Fri, 19 Apr 2013 08:05:25 -0400 Subject: Fixes #5367 returning all groups that a user belongs to - need to deprecate this function --- engine/lib/group.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/engine/lib/group.php b/engine/lib/group.php index 624029d98..359bc59c2 100644 --- a/engine/lib/group.php +++ b/engine/lib/group.php @@ -242,7 +242,8 @@ function get_users_membership($user_guid) { $options = array( 'relationship' => 'member', 'relationship_guid' => $user_guid, - 'inverse_relationship' => FALSE + 'inverse_relationship' => false, + 'limit' => false, ); return elgg_get_entities_from_relationship($options); } -- cgit v1.2.3 From c4e17030d2041963a11f86abee584c04ee7ccd82 Mon Sep 17 00:00:00 2001 From: cash Date: Fri, 19 Apr 2013 09:34:51 -0400 Subject: Fixes #5373 easier to access URL on reported content --- .../views/default/object/reported_content.php | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mod/reportedcontent/views/default/object/reported_content.php b/mod/reportedcontent/views/default/object/reported_content.php index 0e733e154..cc33f54fb 100644 --- a/mod/reportedcontent/views/default/object/reported_content.php +++ b/mod/reportedcontent/views/default/object/reported_content.php @@ -57,16 +57,6 @@ if ($report->state == 'archived') {

    : title; ?> -
    - "#report-$report->guid", - 'text' => elgg_echo('reportedcontent:moreinfo'), - 'rel' => "toggle", - )); - ?> -

    - - + 0) { foreach ($profile_fields as $shortname => $valtype) { @@ -40,6 +42,14 @@ if (is_array($profile_fields) && count($profile_fields) > 0) { $access_id = ACCESS_DEFAULT; } + //sticky form values take precedence over saved ones + if (isset($stickyValues[$shortname])) { + $value = $stickyValues[$shortname]; + } + if (isset($stickyValues['accesslevel'][$shortname])) { + $access_id = $stickyValues['accesslevel'][$shortname]; + } + ?>
    -- cgit v1.2.3 From 666b214e94b80b713797710d04d06d3f11271ff0 Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Fri, 7 Jun 2013 18:26:18 -0400 Subject: clearing sticky form after it is used on edit form --- actions/profile/edit.php | 2 +- views/default/forms/profile/edit.php | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/actions/profile/edit.php b/actions/profile/edit.php index c2a124309..e1f066e82 100644 --- a/actions/profile/edit.php +++ b/actions/profile/edit.php @@ -82,7 +82,7 @@ if (sizeof($input) > 0) { ); elgg_delete_metadata($options); - if(!is_null($value) && ($value !== '')){ + if (!is_null($value) && ($value !== '')) { // only create metadata for non empty values (0 is allowed) to prevent metadata records with empty string values #4858 if (isset($accesslevel[$shortname])) { diff --git a/views/default/forms/profile/edit.php b/views/default/forms/profile/edit.php index aef180f36..cb0a37ca4 100644 --- a/views/default/forms/profile/edit.php +++ b/views/default/forms/profile/edit.php @@ -13,7 +13,7 @@
    0) { @@ -42,12 +42,12 @@ if (is_array($profile_fields) && count($profile_fields) > 0) { $access_id = ACCESS_DEFAULT; } - //sticky form values take precedence over saved ones - if (isset($stickyValues[$shortname])) { - $value = $stickyValues[$shortname]; + // sticky form values take precedence over saved ones + if (isset($sticky_values[$shortname])) { + $value = $sticky_values[$shortname]; } - if (isset($stickyValues['accesslevel'][$shortname])) { - $access_id = $stickyValues['accesslevel'][$shortname]; + if (isset($sticky_values['accesslevel'][$shortname])) { + $access_id = $sticky_values['accesslevel'][$shortname]; } ?> @@ -69,6 +69,9 @@ if (is_array($profile_fields) && count($profile_fields) > 0) {
    Date: Sun, 9 Jun 2013 01:48:07 +0200 Subject: Refs #5594 - Clearer JS implementation --- views/default/core/walled_garden/login.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/views/default/core/walled_garden/login.php b/views/default/core/walled_garden/login.php index db9ce3310..54af90f67 100644 --- a/views/default/core/walled_garden/login.php +++ b/views/default/core/walled_garden/login.php @@ -34,7 +34,7 @@ if (elgg_is_sticky_form('register')) { ?> Date: Sat, 8 Jun 2013 21:12:03 -0400 Subject: ElggBatch with incrementOffset off now handles incomplete entities --- engine/classes/ElggBatch.php | 36 +++++++++++++++++++-------- engine/tests/api/helpers.php | 58 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 80 insertions(+), 14 deletions(-) diff --git a/engine/classes/ElggBatch.php b/engine/classes/ElggBatch.php index 83963ccee..ac79cf084 100644 --- a/engine/classes/ElggBatch.php +++ b/engine/classes/ElggBatch.php @@ -242,9 +242,12 @@ class ElggBatch /** * Fetches the next chunk of results * + * @param int $num_incompletes_last_fetch When called recursively, this is the number of + * incomplete entities returned in the last fetch. + * * @return bool */ - private function getNextResultsChunk() { + private function getNextResultsChunk($num_incompletes_last_fetch = 0) { // always reset results. $this->results = array(); @@ -278,7 +281,7 @@ class ElggBatch if ($this->incrementOffset) { $offset = $this->offset + $this->retrievedResults; } else { - $offset = $this->offset; + $offset = $this->offset + $num_incompletes_last_fetch; } $current_options = array( @@ -292,17 +295,30 @@ class ElggBatch $this->incompleteEntities = array(); $this->results = call_user_func_array($this->getter, array($options)); - // If there were incomplete entities, we pretend they were at the beginning of the results, - // fool the local counter to think it's skipped by them already, and update the running - // total as if the results contained the incompletes. - if ($this->results || $this->incompleteEntities) { + $num_results = count($this->results); + $num_incomplete = count($this->incompleteEntities); + + if ($this->incompleteEntities) { + // pad the front of the results with nulls representing the incompletes + array_splice($this->results, 0, 0, array_pad(array(), $num_incomplete, null)); + // ...and skip past them + reset($this->results); + for ($i = 0; $i < $num_incomplete; $i++) { + next($this->results); + } + } + + if ($this->results) { $this->chunkIndex++; - $this->resultIndex = count($this->incompleteEntities); - $this->retrievedResults += (count($this->results) + count($this->incompleteEntities)); - if (!$this->results) { + + // let the system know we've jumped past the nulls + $this->resultIndex = $num_incomplete; + + $this->retrievedResults += ($num_results + $num_incomplete); + if ($num_results == 0) { // This fetch was *all* incompletes! We need to fetch until we can either // offer at least one row to iterate over, or give up. - return $this->getNextResultsChunk(); + return $this->getNextResultsChunk($num_incomplete); } return true; } else { diff --git a/engine/tests/api/helpers.php b/engine/tests/api/helpers.php index 43244636b..06ef55138 100644 --- a/engine/tests/api/helpers.php +++ b/engine/tests/api/helpers.php @@ -578,16 +578,14 @@ class ElggCoreHelpersTest extends ElggCoreUnitTest { $this->assertEqual(11, $j); } - public function testElggBatchHandlesBrokenEntities() { + public function testElggBatchReadHandlesBrokenEntities() { $num_test_entities = 6; $guids = array(); - $now = time(); for ($i = $num_test_entities; $i > 0; $i--) { $entity = new ElggObject(); $entity->type = 'object'; $entity->subtype = 'test_5357_subtype'; $entity->access_id = ACCESS_PUBLIC; - $entity->time_created = ($now - $i); $entity->save(); $guids[] = $entity->guid; _elgg_invalidate_cache_for_entity($entity->guid); @@ -604,11 +602,12 @@ class ElggCoreHelpersTest extends ElggCoreUnitTest { $options = array( 'type' => 'object', 'subtype' => 'test_5357_subtype', - 'order_by' => 'e.time_created ASC', + 'order_by' => 'e.guid', ); $entities_visited = array(); $batch = new ElggBatch('elgg_get_entities', $options, null, 2); + /* @var ElggEntity[] $batch */ foreach ($batch as $entity) { $entities_visited[] = $entity->guid; } @@ -629,6 +628,57 @@ class ElggCoreHelpersTest extends ElggCoreUnitTest { delete_data("DELETE FROM {$db_prefix}objects_entity WHERE guid IN (" . implode(',', $guids) . ")"); } + public function testElggBatchDeleteHandlesBrokenEntities() { + $num_test_entities = 6; + $guids = array(); + for ($i = $num_test_entities; $i > 0; $i--) { + $entity = new ElggObject(); + $entity->type = 'object'; + $entity->subtype = 'test_5357_subtype'; + $entity->access_id = ACCESS_PUBLIC; + $entity->save(); + $guids[] = $entity->guid; + _elgg_invalidate_cache_for_entity($entity->guid); + } + + // break entities such that the first fetch has one incomplete + // and the second fetch has only incompletes! + $db_prefix = elgg_get_config('dbprefix'); + delete_data(" + DELETE FROM {$db_prefix}objects_entity + WHERE guid IN ({$guids[1]}, {$guids[2]}, {$guids[3]}) + "); + + $options = array( + 'type' => 'object', + 'subtype' => 'test_5357_subtype', + 'order_by' => 'e.guid', + ); + + $entities_visited = array(); + $batch = new ElggBatch('elgg_get_entities', $options, null, 2, false); + /* @var ElggEntity[] $batch */ + foreach ($batch as $entity) { + $entities_visited[] = $entity->guid; + $entity->delete(); + } + + // The broken entities should not have been visited + $this->assertEqual($entities_visited, array($guids[0], $guids[4], $guids[5])); + + // cleanup (including leftovers from previous tests) + $entity_rows = elgg_get_entities(array_merge($options, array( + 'callback' => '', + 'limit' => false, + ))); + $guids = array(); + foreach ($entity_rows as $row) { + $guids[] = $row->guid; + } + delete_data("DELETE FROM {$db_prefix}entities WHERE guid IN (" . implode(',', $guids) . ")"); + delete_data("DELETE FROM {$db_prefix}objects_entity WHERE guid IN (" . implode(',', $guids) . ")"); + } + static function elgg_batch_callback_test($options, $reset = false) { static $count = 1; -- cgit v1.2.3 From 8a47b2342e53c9cdf3093982486b19d6cc2f3e9b Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Sat, 8 Jun 2013 21:34:11 -0400 Subject: Improved algorithm by tracking total incomplete entities --- engine/classes/ElggBatch.php | 18 ++++++++++++------ engine/tests/api/helpers.php | 16 ++++++++-------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/engine/classes/ElggBatch.php b/engine/classes/ElggBatch.php index ac79cf084..d810ea066 100644 --- a/engine/classes/ElggBatch.php +++ b/engine/classes/ElggBatch.php @@ -156,6 +156,13 @@ class ElggBatch */ private $incompleteEntities = array(); + /** + * Total number of incomplete entities fetched + * + * @var int + */ + private $totalIncompletes = 0; + /** * Batches operations on any elgg_get_*() or compatible function that supports * an options array. @@ -242,12 +249,9 @@ class ElggBatch /** * Fetches the next chunk of results * - * @param int $num_incompletes_last_fetch When called recursively, this is the number of - * incomplete entities returned in the last fetch. - * * @return bool */ - private function getNextResultsChunk($num_incompletes_last_fetch = 0) { + private function getNextResultsChunk() { // always reset results. $this->results = array(); @@ -281,7 +285,7 @@ class ElggBatch if ($this->incrementOffset) { $offset = $this->offset + $this->retrievedResults; } else { - $offset = $this->offset + $num_incompletes_last_fetch; + $offset = $this->offset + $this->totalIncompletes; } $current_options = array( @@ -298,6 +302,8 @@ class ElggBatch $num_results = count($this->results); $num_incomplete = count($this->incompleteEntities); + $this->totalIncompletes += $num_incomplete; + if ($this->incompleteEntities) { // pad the front of the results with nulls representing the incompletes array_splice($this->results, 0, 0, array_pad(array(), $num_incomplete, null)); @@ -318,7 +324,7 @@ class ElggBatch if ($num_results == 0) { // This fetch was *all* incompletes! We need to fetch until we can either // offer at least one row to iterate over, or give up. - return $this->getNextResultsChunk($num_incomplete); + return $this->getNextResultsChunk(); } return true; } else { diff --git a/engine/tests/api/helpers.php b/engine/tests/api/helpers.php index 06ef55138..10216140f 100644 --- a/engine/tests/api/helpers.php +++ b/engine/tests/api/helpers.php @@ -579,7 +579,7 @@ class ElggCoreHelpersTest extends ElggCoreUnitTest { } public function testElggBatchReadHandlesBrokenEntities() { - $num_test_entities = 6; + $num_test_entities = 8; $guids = array(); for ($i = $num_test_entities; $i > 0; $i--) { $entity = new ElggObject(); @@ -592,11 +592,11 @@ class ElggCoreHelpersTest extends ElggCoreUnitTest { } // break entities such that the first fetch has one incomplete - // and the second fetch has only incompletes! + // and the second and third fetches have only incompletes! $db_prefix = elgg_get_config('dbprefix'); delete_data(" DELETE FROM {$db_prefix}objects_entity - WHERE guid IN ({$guids[1]}, {$guids[2]}, {$guids[3]}) + WHERE guid IN ({$guids[1]}, {$guids[2]}, {$guids[3]}, {$guids[4]}, {$guids[5]}) "); $options = array( @@ -613,7 +613,7 @@ class ElggCoreHelpersTest extends ElggCoreUnitTest { } // The broken entities should not have been visited - $this->assertEqual($entities_visited, array($guids[0], $guids[4], $guids[5])); + $this->assertEqual($entities_visited, array($guids[0], $guids[6], $guids[7])); // cleanup (including leftovers from previous tests) $entity_rows = elgg_get_entities(array_merge($options, array( @@ -629,7 +629,7 @@ class ElggCoreHelpersTest extends ElggCoreUnitTest { } public function testElggBatchDeleteHandlesBrokenEntities() { - $num_test_entities = 6; + $num_test_entities = 8; $guids = array(); for ($i = $num_test_entities; $i > 0; $i--) { $entity = new ElggObject(); @@ -642,11 +642,11 @@ class ElggCoreHelpersTest extends ElggCoreUnitTest { } // break entities such that the first fetch has one incomplete - // and the second fetch has only incompletes! + // and the second and third fetches have only incompletes! $db_prefix = elgg_get_config('dbprefix'); delete_data(" DELETE FROM {$db_prefix}objects_entity - WHERE guid IN ({$guids[1]}, {$guids[2]}, {$guids[3]}) + WHERE guid IN ({$guids[1]}, {$guids[2]}, {$guids[3]}, {$guids[4]}, {$guids[5]}) "); $options = array( @@ -664,7 +664,7 @@ class ElggCoreHelpersTest extends ElggCoreUnitTest { } // The broken entities should not have been visited - $this->assertEqual($entities_visited, array($guids[0], $guids[4], $guids[5])); + $this->assertEqual($entities_visited, array($guids[0], $guids[6], $guids[7])); // cleanup (including leftovers from previous tests) $entity_rows = elgg_get_entities(array_merge($options, array( -- cgit v1.2.3 From 5a1a793f8634e0f7133644e09e9ace60782864d4 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Sat, 8 Jun 2013 22:12:12 -0400 Subject: Fixes #5598: Properly update metadata cache in update_metadata() --- engine/lib/metadata.php | 10 +++++----- engine/tests/api/metadata_cache.php | 7 +++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/engine/lib/metadata.php b/engine/lib/metadata.php index 43f7d5d6e..046b85124 100644 --- a/engine/lib/metadata.php +++ b/engine/lib/metadata.php @@ -191,19 +191,19 @@ function update_metadata($id, $name, $value, $value_type, $owner_guid, $access_i } // Add the metastring - $value = add_metastring($value); - if (!$value) { + $value_id = add_metastring($value); + if (!$value_id) { return false; } - $name = add_metastring($name); - if (!$name) { + $name_id = add_metastring($name); + if (!$name_id) { return false; } // If ok then add it $query = "UPDATE {$CONFIG->dbprefix}metadata" - . " set name_id='$name', value_id='$value', value_type='$value_type', access_id=$access_id," + . " set name_id='$name_id', value_id='$value_id', value_type='$value_type', access_id=$access_id," . " owner_guid=$owner_guid where id=$id"; $result = update_data($query); diff --git a/engine/tests/api/metadata_cache.php b/engine/tests/api/metadata_cache.php index 846116a7b..7fb328169 100644 --- a/engine/tests/api/metadata_cache.php +++ b/engine/tests/api/metadata_cache.php @@ -166,4 +166,11 @@ class ElggCoreMetadataCacheTest extends ElggCoreUnitTest { $actual = $this->cache->filterMetadataHeavyEntities($guids, 6000); $this->assertIdentical($actual, $expected); } + + public function testCreateMetadataInvalidates() { + $this->obj1->foo = 1; + create_metadata($this->guid1, 'foo', 2, '', elgg_get_logged_in_user_guid(), ACCESS_FRIENDS); + + $this->assertEqual($this->obj1->foo, 2); + } } -- cgit v1.2.3 From 42726251455e87b5ee4d368c0e743057506ad60d Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Sat, 8 Jun 2013 23:29:52 -0400 Subject: Fixes elgg.session.cookie() support of Date() for expires --- js/lib/session.js | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/js/lib/session.js b/js/lib/session.js index fa3d60aa9..0fc7f5c87 100644 --- a/js/lib/session.js +++ b/js/lib/session.js @@ -47,21 +47,18 @@ elgg.session.cookie = function (name, value, options) { } cookies.push(name + '=' + value); - - if (elgg.isNumber(options.expires)) { - if (elgg.isNumber(options.expires)) { - date = new Date(); - date.setTime(date.getTime() + (options.expires * 24 * 60 * 60 * 1000)); - } else if (options.expires.toUTCString) { - date = options.expires; - } else { - valid = false; - } - - if (valid) { - cookies.push('expires=' + date.toUTCString()); - } - } + + if (elgg.isNumber(options.expires)) { + date = new Date(); + date.setTime(date.getTime() + (options.expires * 24 * 60 * 60 * 1000)); + } else if (options.expires.toUTCString) { + date = options.expires; + } + + if (date) { + cookies.push('expires=' + date.toUTCString()); + } + // CAUTION: Needed to parenthesize options.path and options.domain // in the following expressions, otherwise they evaluate to undefined -- cgit v1.2.3 From 39fa5af1a79f444ec31eb63d0baf299fc8bc7b7a Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Sat, 8 Jun 2013 23:31:37 -0400 Subject: If JS enabled, walled garden registration reload will not flicker --- views/default/core/walled_garden/login.php | 10 --------- views/default/js/walled_garden.php | 35 +++++++++++++++++++++++------- views/default/page/walled_garden.php | 15 ++++++++++++- 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/views/default/core/walled_garden/login.php b/views/default/core/walled_garden/login.php index 54af90f67..42b79607d 100644 --- a/views/default/core/walled_garden/login.php +++ b/views/default/core/walled_garden/login.php @@ -29,13 +29,3 @@ echo <<
    HTML; - -if (elgg_is_sticky_form('register')) { -?> - - elgg_echo('cancel'), 'class' => 'elgg-button-cancel mlm', )); -$cancel_button = trim($cancel_button); +$cancel_button = json_encode($cancel_button); if (0) { ?> + \ No newline at end of file -- cgit v1.2.3 From 731b7bdde4790d2bfd565eb2d9c847f4adedf4b4 Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Wed, 12 Jun 2013 21:09:03 -0400 Subject: Refs #5487 account related pages use walled garden now --- pages/account/forgotten_password.php | 11 ++++++++--- pages/account/login.php | 14 +++++++++++--- pages/account/register.php | 11 ++++++++--- pages/account/reset_password.php | 11 ++++++++--- 4 files changed, 35 insertions(+), 12 deletions(-) diff --git a/pages/account/forgotten_password.php b/pages/account/forgotten_password.php index bf6ef87e0..f464f98c9 100644 --- a/pages/account/forgotten_password.php +++ b/pages/account/forgotten_password.php @@ -17,6 +17,11 @@ $content .= elgg_view_form('user/requestnewpassword', array( 'class' => 'elgg-form-account', )); -$body = elgg_view_layout("one_column", array('content' => $content)); - -echo elgg_view_page($title, $body); +if (elgg_get_config('walled_garden')) { + elgg_load_css('elgg.walled_garden'); + $body = elgg_view_layout('walled_garden', array('content' => $content)); + echo elgg_view_page($title, $body, 'walled_garden'); +} else { + $body = elgg_view_layout('one_column', array('content' => $content)); + echo elgg_view_page($title, $body); +} diff --git a/pages/account/login.php b/pages/account/login.php index 14f65cc3f..6aa3752d0 100644 --- a/pages/account/login.php +++ b/pages/account/login.php @@ -15,6 +15,14 @@ if (elgg_is_logged_in()) { forward(''); } -$login_box = elgg_view('core/account/login_box'); -$content = elgg_view_layout('one_column', array('content' => $login_box)); -echo elgg_view_page(elgg_echo('login'), $content); +$title = elgg_echo('login'); +$content = elgg_view('core/account/login_box'); + +if (elgg_get_config('walled_garden')) { + elgg_load_css('elgg.walled_garden'); + $body = elgg_view_layout('walled_garden', array('content' => $content)); + echo elgg_view_page($title, $body, 'walled_garden'); +} else { + $body = elgg_view_layout('one_column', array('content' => $content)); + echo elgg_view_page($title, $body); +} diff --git a/pages/account/register.php b/pages/account/register.php index cf18a635b..2fe8b74c0 100644 --- a/pages/account/register.php +++ b/pages/account/register.php @@ -48,6 +48,11 @@ $content .= elgg_view_form('register', $form_params, $body_params); $content .= elgg_view('help/register'); -$body = elgg_view_layout("one_column", array('content' => $content)); - -echo elgg_view_page($title, $body); +if (elgg_get_config('walled_garden')) { + elgg_load_css('elgg.walled_garden'); + $body = elgg_view_layout('walled_garden', array('content' => $content)); + echo elgg_view_page($title, $body, 'walled_garden'); +} else { + $body = elgg_view_layout('one_column', array('content' => $content)); + echo elgg_view_page($title, $body); +} diff --git a/pages/account/reset_password.php b/pages/account/reset_password.php index 6515bfc5d..3ab8ccf3e 100644 --- a/pages/account/reset_password.php +++ b/pages/account/reset_password.php @@ -30,6 +30,11 @@ $form = elgg_view_form('user/passwordreset', array('class' => 'elgg-form-account $title = elgg_echo('resetpassword'); $content = elgg_view_title(elgg_echo('resetpassword')) . $form; -$body = elgg_view_layout('one_column', array('content' => $content)); - -echo elgg_view_page($title, $body); +if (elgg_get_config('walled_garden')) { + elgg_load_css('elgg.walled_garden'); + $body = elgg_view_layout('walled_garden', array('content' => $content)); + echo elgg_view_page($title, $body, 'walled_garden'); +} else { + $body = elgg_view_layout('one_column', array('content' => $content)); + echo elgg_view_page($title, $body); +} -- cgit v1.2.3 From 7b7e6c190f146525848d1849d2d903473f8d7682 Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Wed, 12 Jun 2013 21:09:03 -0400 Subject: Refs #5487 account related pages use walled garden now --- pages/account/forgotten_password.php | 11 ++++++++--- pages/account/login.php | 14 +++++++++++--- pages/account/register.php | 11 ++++++++--- pages/account/reset_password.php | 11 ++++++++--- 4 files changed, 35 insertions(+), 12 deletions(-) diff --git a/pages/account/forgotten_password.php b/pages/account/forgotten_password.php index bf6ef87e0..f464f98c9 100644 --- a/pages/account/forgotten_password.php +++ b/pages/account/forgotten_password.php @@ -17,6 +17,11 @@ $content .= elgg_view_form('user/requestnewpassword', array( 'class' => 'elgg-form-account', )); -$body = elgg_view_layout("one_column", array('content' => $content)); - -echo elgg_view_page($title, $body); +if (elgg_get_config('walled_garden')) { + elgg_load_css('elgg.walled_garden'); + $body = elgg_view_layout('walled_garden', array('content' => $content)); + echo elgg_view_page($title, $body, 'walled_garden'); +} else { + $body = elgg_view_layout('one_column', array('content' => $content)); + echo elgg_view_page($title, $body); +} diff --git a/pages/account/login.php b/pages/account/login.php index 14f65cc3f..6aa3752d0 100644 --- a/pages/account/login.php +++ b/pages/account/login.php @@ -15,6 +15,14 @@ if (elgg_is_logged_in()) { forward(''); } -$login_box = elgg_view('core/account/login_box'); -$content = elgg_view_layout('one_column', array('content' => $login_box)); -echo elgg_view_page(elgg_echo('login'), $content); +$title = elgg_echo('login'); +$content = elgg_view('core/account/login_box'); + +if (elgg_get_config('walled_garden')) { + elgg_load_css('elgg.walled_garden'); + $body = elgg_view_layout('walled_garden', array('content' => $content)); + echo elgg_view_page($title, $body, 'walled_garden'); +} else { + $body = elgg_view_layout('one_column', array('content' => $content)); + echo elgg_view_page($title, $body); +} diff --git a/pages/account/register.php b/pages/account/register.php index cf18a635b..2fe8b74c0 100644 --- a/pages/account/register.php +++ b/pages/account/register.php @@ -48,6 +48,11 @@ $content .= elgg_view_form('register', $form_params, $body_params); $content .= elgg_view('help/register'); -$body = elgg_view_layout("one_column", array('content' => $content)); - -echo elgg_view_page($title, $body); +if (elgg_get_config('walled_garden')) { + elgg_load_css('elgg.walled_garden'); + $body = elgg_view_layout('walled_garden', array('content' => $content)); + echo elgg_view_page($title, $body, 'walled_garden'); +} else { + $body = elgg_view_layout('one_column', array('content' => $content)); + echo elgg_view_page($title, $body); +} diff --git a/pages/account/reset_password.php b/pages/account/reset_password.php index 6515bfc5d..3ab8ccf3e 100644 --- a/pages/account/reset_password.php +++ b/pages/account/reset_password.php @@ -30,6 +30,11 @@ $form = elgg_view_form('user/passwordreset', array('class' => 'elgg-form-account $title = elgg_echo('resetpassword'); $content = elgg_view_title(elgg_echo('resetpassword')) . $form; -$body = elgg_view_layout('one_column', array('content' => $content)); - -echo elgg_view_page($title, $body); +if (elgg_get_config('walled_garden')) { + elgg_load_css('elgg.walled_garden'); + $body = elgg_view_layout('walled_garden', array('content' => $content)); + echo elgg_view_page($title, $body, 'walled_garden'); +} else { + $body = elgg_view_layout('one_column', array('content' => $content)); + echo elgg_view_page($title, $body); +} -- cgit v1.2.3 From 13a7ca453d09672c7bbc7bef84eaf4e47b646da5 Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Wed, 12 Jun 2013 21:45:06 -0400 Subject: removed unnecessary cookie and fixed empty screen if cancel after failing registration --- views/default/js/walled_garden.php | 12 +----------- views/default/page/walled_garden.php | 2 +- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/views/default/js/walled_garden.php b/views/default/js/walled_garden.php index 09a478e93..e228df507 100644 --- a/views/default/js/walled_garden.php +++ b/views/default/js/walled_garden.php @@ -5,7 +5,6 @@ * @since 1.8 */ -// note that this assumes the button view is not using single quotes $cancel_button = elgg_view('input/button', array( 'value' => elgg_echo('cancel'), 'class' => 'elgg-button-cancel mlm', @@ -52,22 +51,13 @@ elgg.walled_garden.load = function(view) { if (view == 'register' && $wg.hasClass('hidden')) { // this was a failed register, display the register form ASAP - $('#elgg-walledgarden-login').toggle(); + $('#elgg-walledgarden-login').toggle(false); $(id).toggle(); $wg.removeClass('hidden'); } else { $('#elgg-walledgarden-login').fadeToggle(); $(id).fadeToggle(); } - - if (view == 'register') { - $('.elgg-form-register').submit(function () { - // set short cookie indicating JS support - var date = new Date(); - date.setTime(date.getTime() + (60 * 1000)); - elgg.session.cookie('elgg_js_support', '1', { expires: date }); - }); - } } }); event.preventDefault(); diff --git a/views/default/page/walled_garden.php b/views/default/page/walled_garden.php index 87a79a690..b280cf6b2 100644 --- a/views/default/page/walled_garden.php +++ b/views/default/page/walled_garden.php @@ -7,7 +7,7 @@ $is_sticky_register = elgg_is_sticky_form('register'); $wg_body_class = 'elgg-body-walledgarden'; -if ($is_sticky_register && !empty($_COOKIE['elgg_js_support'])) { +if ($is_sticky_register) { $wg_body_class .= ' hidden'; } -- cgit v1.2.3 From 6d17bde6d059a5d0d5782719623317ec494b773b Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Thu, 13 Jun 2013 07:50:50 -0400 Subject: added some of the notes for the 1.8.16 release --- CHANGES.txt | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/CHANGES.txt b/CHANGES.txt index 9b79735b3..39a88a677 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,26 @@ +Version 1.8.16 +(June xx, 2013 from https://github.com/Elgg/Elgg/tree/1.8) + Contributing Developers: + * Brett Profitt + * Cash Costello + * Jeff Tilson + * Jerome Bakker + * Paweł Sroka + * Steve Clay + + Security Fixes: + * + + Bugfixes: + * Fixed infinite loop when deleting/disabling an entity with > 50 annotations + * Fixed deleting log tables in log rotate plugin + * Added full text index for groups if missing + * Added workaround for IE8 and jumping user avatar + * Fixed pagination for members pages + * Fixed several internal cache issues + * Plus many more bug fixes + + Version 1.8.15 (April 23, 2013 from https://github.com/Elgg/Elgg/tree/1.8) Contributing Developers: -- cgit v1.2.3 From d2b525a8c9d4926944ad4a9126dbd266dc823a9b Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Fri, 14 Jun 2013 07:29:43 -0400 Subject: Fixes #5626 adds limit for display of titles for river, breadcrumbs, and list pages --- engine/lib/navigation.php | 2 +- views/default/object/elements/summary.php | 2 +- views/default/river/elements/summary.php | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/engine/lib/navigation.php b/engine/lib/navigation.php index 2831d418b..ab9cc05e8 100644 --- a/engine/lib/navigation.php +++ b/engine/lib/navigation.php @@ -218,7 +218,7 @@ function elgg_push_breadcrumb($title, $link = NULL) { } // avoid key collisions. - $CONFIG->breadcrumbs[] = array('title' => $title, 'link' => $link); + $CONFIG->breadcrumbs[] = array('title' => elgg_get_excerpt($title, 100), 'link' => $link); } /** diff --git a/views/default/object/elements/summary.php b/views/default/object/elements/summary.php index c0f3ad340..63ab8f816 100644 --- a/views/default/object/elements/summary.php +++ b/views/default/object/elements/summary.php @@ -27,7 +27,7 @@ if ($title_link === '') { $text = $entity->name; } $params = array( - 'text' => $text, + 'text' => elgg_get_excerpt($text, 100), 'href' => $entity->getURL(), 'is_trusted' => true, ); diff --git a/views/default/river/elements/summary.php b/views/default/river/elements/summary.php index 416bc708b..d7bde51dd 100644 --- a/views/default/river/elements/summary.php +++ b/views/default/river/elements/summary.php @@ -18,9 +18,10 @@ $subject_link = elgg_view('output/url', array( 'is_trusted' => true, )); +$object_text = $object->title ? $object->title : $object->name; $object_link = elgg_view('output/url', array( 'href' => $object->getURL(), - 'text' => $object->title ? $object->title : $object->name, + 'text' => elgg_get_excerpt($object_text, 100), 'class' => 'elgg-river-object', 'is_trusted' => true, )); -- cgit v1.2.3 From afb0cef118a59c86e649e0717748c299a05f2427 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Fri, 14 Jun 2013 11:28:54 -0400 Subject: elgg.session.cookie() again handles missing options.expires --- js/lib/session.js | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/js/lib/session.js b/js/lib/session.js index 0fc7f5c87..a8d52733c 100644 --- a/js/lib/session.js +++ b/js/lib/session.js @@ -14,9 +14,9 @@ elgg.provide('elgg.session'); * {string} options[domain] * {boolean} options[secure] * - * @return {string} The value of the cookie, if only name is specified + * @return {string|undefined} The value of the cookie, if only name is specified. Undefined if no value set */ -elgg.session.cookie = function (name, value, options) { +elgg.session.cookie = function(name, value, options) { var cookies = [], cookie = [], i = 0, date, valid = true; //elgg.session.cookie() @@ -48,17 +48,18 @@ elgg.session.cookie = function (name, value, options) { cookies.push(name + '=' + value); - if (elgg.isNumber(options.expires)) { - date = new Date(); - date.setTime(date.getTime() + (options.expires * 24 * 60 * 60 * 1000)); - } else if (options.expires.toUTCString) { - date = options.expires; - } + if (options.expires) { + if (elgg.isNumber(options.expires)) { + date = new Date(); + date.setTime(date.getTime() + (options.expires * 24 * 60 * 60 * 1000)); + } else if (options.expires.toUTCString) { + date = options.expires; + } - if (date) { - cookies.push('expires=' + date.toUTCString()); + if (date) { + cookies.push('expires=' + date.toUTCString()); + } } - // CAUTION: Needed to parenthesize options.path and options.domain // in the following expressions, otherwise they evaluate to undefined -- cgit v1.2.3 From e29554909dc098f163a9dc6de31c42cd749ab4ae Mon Sep 17 00:00:00 2001 From: Jerome Bakker Date: Mon, 17 Jun 2013 14:47:49 +0200 Subject: fixed: wrong default behaviour in can_edit_entity_metadata --- engine/lib/entities.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/engine/lib/entities.php b/engine/lib/entities.php index 072b26805..226cf5c6c 100644 --- a/engine/lib/entities.php +++ b/engine/lib/entities.php @@ -761,7 +761,7 @@ function get_entity($guid) { // @todo We need a single Memcache instance with a shared pool of namespace wrappers. This function would pull an instance from the pool. static $shared_cache; - // We could also use: if (!(int) $guid) { return FALSE }, + // We could also use: if (!(int) $guid) { return FALSE }, // but that evaluates to a false positive for $guid = TRUE. // This is a bit slower, but more thorough. if (!is_numeric($guid) || $guid === 0 || $guid === '0') { @@ -2126,7 +2126,7 @@ function can_edit_entity_metadata($entity_guid, $user_guid = 0, $metadata = null $return = null; - if ($metadata->owner_guid == 0) { + if ($metadata && ($metadata->owner_guid == 0)) { $return = true; } if (is_null($return)) { -- cgit v1.2.3 From ccdf769513fa0268c5ef6880f54dfa1a25f71da6 Mon Sep 17 00:00:00 2001 From: Paweł Sroka Date: Wed, 19 Jun 2013 19:37:10 +0200 Subject: Fixes #5647 - Corrects handling of plus sign in elgg.parse_str --- js/lib/elgglib.js | 4 ++-- js/tests/ElggLibTest.js | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/js/lib/elgglib.js b/js/lib/elgglib.js index af2c94000..cea2d0cdb 100644 --- a/js/lib/elgglib.js +++ b/js/lib/elgglib.js @@ -474,8 +474,8 @@ elgg.parse_str = function(string) { re = /([^&=]+)=?([^&]*)/g; while (result = re.exec(string)) { - key = decodeURIComponent(result[1]) - value = decodeURIComponent(result[2]) + key = decodeURIComponent(result[1].replace(/\+/g,' ')) + value = decodeURIComponent(result[2].replace(/\+/g,' ')) params[key] = value; } diff --git a/js/tests/ElggLibTest.js b/js/tests/ElggLibTest.js index 31b561923..21c6cb214 100644 --- a/js/tests/ElggLibTest.js +++ b/js/tests/ElggLibTest.js @@ -128,3 +128,13 @@ ElggLibTest.prototype.testParseUrl = function() { }); }; +ElggLibTest.prototype.testParseStr = function() { + + [ + ["A+%2B+B", "A + B"] + + ].forEach(function(args) { + assertEquals(args[1], elgg.parse_str(args[0])); + }); +}; + -- cgit v1.2.3 From 77773f0cf081dbb7321075f625cc4ef3d07bbe3b Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Wed, 19 Jun 2013 15:29:18 -0400 Subject: style fixes --- js/lib/elgglib.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/js/lib/elgglib.js b/js/lib/elgglib.js index cea2d0cdb..a8e187f1d 100644 --- a/js/lib/elgglib.js +++ b/js/lib/elgglib.js @@ -474,8 +474,8 @@ elgg.parse_str = function(string) { re = /([^&=]+)=?([^&]*)/g; while (result = re.exec(string)) { - key = decodeURIComponent(result[1].replace(/\+/g,' ')) - value = decodeURIComponent(result[2].replace(/\+/g,' ')) + key = decodeURIComponent(result[1].replace(/\+/g, ' ')); + value = decodeURIComponent(result[2].replace(/\+/g, ' ')); params[key] = value; } -- cgit v1.2.3 From 8464276af251c8938ecdeb558de4ccaece4f628a Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Wed, 19 Jun 2013 15:40:30 -0400 Subject: Fix ElggLibTest.prototype.testParseStr --- js/tests/ElggLibTest.js | 2 +- js/tests/README | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/js/tests/ElggLibTest.js b/js/tests/ElggLibTest.js index 21c6cb214..bd39e7fb3 100644 --- a/js/tests/ElggLibTest.js +++ b/js/tests/ElggLibTest.js @@ -131,7 +131,7 @@ ElggLibTest.prototype.testParseUrl = function() { ElggLibTest.prototype.testParseStr = function() { [ - ["A+%2B+B", "A + B"] + ["A+%2B+B=A+%2B+B", {"A + B": "A + B"}] ].forEach(function(args) { assertEquals(args[1], elgg.parse_str(args[0])); diff --git a/js/tests/README b/js/tests/README index 4f86b27c6..f43c0c89d 100644 --- a/js/tests/README +++ b/js/tests/README @@ -12,9 +12,10 @@ based debuggers. Visit its wiki at the Google Code site for more information. Sample Usage ============ 1. Put jar file in the base directory of Elgg - 2. Run the server: java -jar JsTestDriver-1.3.3d.jar --port 4224 + 2. Run the server: java -jar JsTestDriver-1.3.5.jar --port 4224 3. Point a web browser at http://localhost:4224 - 4. Run the tests: java -jar JsTestDriver-1.3.3d.jar --config js/tests/jsTestDriver.conf --basePath . --tests all + 4. Click "Capture this browser" + 5. Run the tests: java -jar JsTestDriver-1.3.5.jar --config js/tests/jsTestDriver.conf --basePath . --tests all Configuration Hints -- cgit v1.2.3 From 0433c6e2e1cfda644e18640867828de787cccb8e Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Wed, 19 Jun 2013 15:43:01 -0400 Subject: Add JsTestDriver jar to gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c0bba2c6c..a1c78c400 100644 --- a/.gitignore +++ b/.gitignore @@ -38,7 +38,7 @@ !/mod/uservalidationbyemail/ !/mod/zaudio/ -# ignore IDE/hidden/OS cache files +# ignore IDE/hidden/testing/OS cache files .* *~ /nbproject @@ -49,6 +49,7 @@ Session.vim tmtags Thumbs.db Desktop.ini +/JsTestDriver-*.jar # don't ignore travis config !/.travis.yml -- cgit v1.2.3 From c9d1dd21d3dcf7353570d555c21a2b6ca7ecdd00 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Sat, 22 Jun 2013 20:19:53 -0400 Subject: Fixes #5673: Properly check simplecache views list if cache file missing --- engine/handlers/cache_handler.php | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/engine/handlers/cache_handler.php b/engine/handlers/cache_handler.php index 9848d3531..36fc665bb 100644 --- a/engine/handlers/cache_handler.php +++ b/engine/handlers/cache_handler.php @@ -88,20 +88,18 @@ header("ETag: \"$etag\""); $filename = $dataroot . 'views_simplecache/' . md5($viewtype . $view); if (file_exists($filename)) { - $contents = file_get_contents($filename); + readfile($filename); } else { // someone trying to access a non-cached file or a race condition with cache flushing mysql_close($mysql_dblink); require_once(dirname(dirname(__FILE__)) . "/start.php"); global $CONFIG; - if (!isset($CONFIG->views->simplecache[$view])) { + if (!in_array($view, $CONFIG->views->simplecache)) { header("HTTP/1.1 404 Not Found"); exit; } elgg_set_viewtype($viewtype); - $contents = elgg_view($view); + echo elgg_view($view); } - -echo $contents; -- cgit v1.2.3 From 175c65bec4a46ee7ffa424555870b383e77bd3bf Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Tue, 25 Jun 2013 06:54:48 -0400 Subject: preparing 1.8.16 release --- CHANGES.txt | 4 ++-- version.php | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 39a88a677..187dc7e25 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,5 @@ Version 1.8.16 -(June xx, 2013 from https://github.com/Elgg/Elgg/tree/1.8) +(June 25, 2013 from https://github.com/Elgg/Elgg/tree/1.8) Contributing Developers: * Brett Profitt * Cash Costello @@ -9,7 +9,7 @@ Version 1.8.16 * Steve Clay Security Fixes: - * + * Fixed avatar removal bug (thanks to Jerome Bakker for the first report of this) Bugfixes: * Fixed infinite loop when deleting/disabling an entity with > 50 annotations diff --git a/version.php b/version.php index ac554a945..f21ea074a 100644 --- a/version.php +++ b/version.php @@ -14,4 +14,4 @@ $version = 2013051700; // Human-friendly version name -$release = '1.8.15'; +$release = '1.8.16'; -- cgit v1.2.3 From a873fa6429460ccebbfdb5b7d17f124c80a6ee5c Mon Sep 17 00:00:00 2001 From: cash Date: Fri, 5 Jul 2013 20:10:12 -0400 Subject: Fixes #5745 serve 404 pages when someone requests content of a user that does not exist --- mod/blog/start.php | 13 +++++++++++++ mod/bookmarks/pages/bookmarks/friends.php | 2 +- mod/bookmarks/pages/bookmarks/owner.php | 2 +- mod/file/pages/file/friends.php | 2 +- mod/file/pages/file/owner.php | 2 +- mod/groups/lib/discussion.php | 5 ++--- mod/groups/lib/groups.php | 4 ++-- mod/groups/start.php | 4 ++++ mod/pages/pages/pages/friends.php | 2 +- mod/pages/pages/pages/owner.php | 2 +- mod/thewire/pages/thewire/friends.php | 2 +- mod/thewire/pages/thewire/owner.php | 2 +- 12 files changed, 29 insertions(+), 13 deletions(-) diff --git a/mod/blog/start.php b/mod/blog/start.php index 25cd81935..91525acee 100644 --- a/mod/blog/start.php +++ b/mod/blog/start.php @@ -113,14 +113,23 @@ function blog_page_handler($page) { switch ($page_type) { case 'owner': $user = get_user_by_username($page[1]); + if (!$user) { + forward('', '404'); + } $params = blog_get_page_content_list($user->guid); break; case 'friends': $user = get_user_by_username($page[1]); + if (!$user) { + forward('', '404'); + } $params = blog_get_page_content_friends($user->guid); break; case 'archive': $user = get_user_by_username($page[1]); + if (!$user) { + forward('', '404'); + } $params = blog_get_page_content_archive($user->guid, $page[2], $page[3]); break; case 'view': @@ -139,6 +148,10 @@ function blog_page_handler($page) { $params = blog_get_page_content_edit($page_type, $page[1], $page[2]); break; case 'group': + $group = get_entity($page[1]); + if (!elgg_instanceof($group, 'group')) { + forward('', '404'); + } if ($page[2] == 'all') { $params = blog_get_page_content_list($page[1]); } else { diff --git a/mod/bookmarks/pages/bookmarks/friends.php b/mod/bookmarks/pages/bookmarks/friends.php index 15b1da098..173996346 100644 --- a/mod/bookmarks/pages/bookmarks/friends.php +++ b/mod/bookmarks/pages/bookmarks/friends.php @@ -7,7 +7,7 @@ $page_owner = elgg_get_page_owner_entity(); if (!$page_owner) { - forward('bookmarks/all'); + forward('', '404'); } elgg_push_breadcrumb($page_owner->name, "bookmarks/owner/$page_owner->username"); diff --git a/mod/bookmarks/pages/bookmarks/owner.php b/mod/bookmarks/pages/bookmarks/owner.php index b99730fb9..b7b907916 100644 --- a/mod/bookmarks/pages/bookmarks/owner.php +++ b/mod/bookmarks/pages/bookmarks/owner.php @@ -7,7 +7,7 @@ $page_owner = elgg_get_page_owner_entity(); if (!$page_owner) { - forward('bookmarks/all'); + forward('', '404'); } elgg_push_breadcrumb($page_owner->name); diff --git a/mod/file/pages/file/friends.php b/mod/file/pages/file/friends.php index f504bdc1f..d55c1e62b 100644 --- a/mod/file/pages/file/friends.php +++ b/mod/file/pages/file/friends.php @@ -7,7 +7,7 @@ $owner = elgg_get_page_owner_entity(); if (!$owner) { - forward('file/all'); + forward('', '404'); } elgg_push_breadcrumb(elgg_echo('file'), "file/all"); diff --git a/mod/file/pages/file/owner.php b/mod/file/pages/file/owner.php index 5ad6866d6..99cf62714 100644 --- a/mod/file/pages/file/owner.php +++ b/mod/file/pages/file/owner.php @@ -10,7 +10,7 @@ group_gatekeeper(); $owner = elgg_get_page_owner_entity(); if (!$owner) { - forward('file/all'); + forward('', '404'); } elgg_push_breadcrumb(elgg_echo('file'), "file/all"); diff --git a/mod/groups/lib/discussion.php b/mod/groups/lib/discussion.php index ab2fe4849..874e21b2d 100644 --- a/mod/groups/lib/discussion.php +++ b/mod/groups/lib/discussion.php @@ -39,9 +39,8 @@ function discussion_handle_list_page($guid) { elgg_set_page_owner_guid($guid); $group = get_entity($guid); - if (!$group) { - register_error(elgg_echo('group:notfound')); - forward(); + if (!elgg_instanceof($group, 'group')) { + forward('', '404'); } elgg_push_breadcrumb($group->name); diff --git a/mod/groups/lib/groups.php b/mod/groups/lib/groups.php index 0557d41eb..e5b047eba 100644 --- a/mod/groups/lib/groups.php +++ b/mod/groups/lib/groups.php @@ -255,8 +255,8 @@ function groups_handle_profile_page($guid) { elgg_push_context('group_profile'); $group = get_entity($guid); - if (!$group) { - forward('groups/all'); + if (!elgg_instanceof($group, 'group')) { + forward('', '404'); } elgg_push_breadcrumb($group->name); diff --git a/mod/groups/start.php b/mod/groups/start.php index 46ab0e636..6002a535c 100644 --- a/mod/groups/start.php +++ b/mod/groups/start.php @@ -142,6 +142,10 @@ function groups_setup_sidebar_menus() { $page_owner = elgg_get_page_owner_entity(); if (elgg_in_context('group_profile')) { + if (!elgg_instanceof($page_owner, 'group')) { + forward('', '404'); + } + if (elgg_is_logged_in() && $page_owner->canEdit() && !$page_owner->isPublicMembership()) { $url = elgg_get_site_url() . "groups/requests/{$page_owner->getGUID()}"; diff --git a/mod/pages/pages/pages/friends.php b/mod/pages/pages/pages/friends.php index 87ac631c2..cecc4053b 100644 --- a/mod/pages/pages/pages/friends.php +++ b/mod/pages/pages/pages/friends.php @@ -7,7 +7,7 @@ $owner = elgg_get_page_owner_entity(); if (!$owner) { - forward('pages/all'); + forward('', '404'); } elgg_push_breadcrumb($owner->name, "pages/owner/$owner->username"); diff --git a/mod/pages/pages/pages/owner.php b/mod/pages/pages/pages/owner.php index 48199368c..7de74a3b4 100644 --- a/mod/pages/pages/pages/owner.php +++ b/mod/pages/pages/pages/owner.php @@ -7,7 +7,7 @@ $owner = elgg_get_page_owner_entity(); if (!$owner) { - forward('pages/all'); + forward('', '404'); } // access check for closed groups diff --git a/mod/thewire/pages/thewire/friends.php b/mod/thewire/pages/thewire/friends.php index e7f5eed59..efa7e7a56 100644 --- a/mod/thewire/pages/thewire/friends.php +++ b/mod/thewire/pages/thewire/friends.php @@ -5,7 +5,7 @@ $owner = elgg_get_page_owner_entity(); if (!$owner) { - forward('thewire/all'); + forward('', '404'); } $title = elgg_echo('thewire:friends'); diff --git a/mod/thewire/pages/thewire/owner.php b/mod/thewire/pages/thewire/owner.php index d8dff401e..dc25940e1 100644 --- a/mod/thewire/pages/thewire/owner.php +++ b/mod/thewire/pages/thewire/owner.php @@ -6,7 +6,7 @@ $owner = elgg_get_page_owner_entity(); if (!$owner) { - forward('thewire/all'); + forward('', '404'); } $title = elgg_echo('thewire:user', array($owner->name)); -- cgit v1.2.3 From 597323ce5dece2597accbf98d55c2d2fa6891f4f Mon Sep 17 00:00:00 2001 From: cash Date: Fri, 5 Jul 2013 20:44:02 -0400 Subject: Fixes #5708 search supports multiple comments on the same entity --- mod/search/search_hooks.php | 28 +++++++++++++++------- .../views/default/search/comments/entity.php | 11 +++++---- mod/search/views/rss/search/comments/entity.php | 11 +++++---- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/mod/search/search_hooks.php b/mod/search/search_hooks.php index c92003c7e..923cf0aa8 100644 --- a/mod/search/search_hooks.php +++ b/mod/search/search_hooks.php @@ -405,14 +405,19 @@ function search_comments_hook($hook, $type, $value, $params) { // don't continue if nothing there... if (!$count) { - return array ('entities' => array(), 'count' => 0); + return array('entities' => array(), 'count' => 0); } - - $order_by = search_get_order_by_sql('e', null, $params['sort'], $params['order']); + + // no full text index on metastrings table + if ($params['sort'] == 'relevance') { + $params['sort'] = 'created'; + } + + $order_by = search_get_order_by_sql('a', null, $params['sort'], $params['order']); if ($order_by) { $order_by = "ORDER BY $order_by"; } - + $q = "SELECT DISTINCT a.*, msv.string as comment FROM {$db_prefix}annotations a JOIN {$db_prefix}metastrings msn ON a.name_id = msn.id JOIN {$db_prefix}metastrings msv ON a.value_id = msv.id @@ -450,10 +455,17 @@ function search_comments_hook($hook, $type, $value, $params) { } $comment_str = search_get_highlighted_relevant_substrings($comment->comment, $query); - $entity->setVolatileData('search_match_annotation_id', $comment->id); - $entity->setVolatileData('search_matched_comment', $comment_str); - $entity->setVolatileData('search_matched_comment_owner_guid', $comment->owner_guid); - $entity->setVolatileData('search_matched_comment_time_created', $comment->time_created); + $comments_data = $entity->getVolatileData('search_comments_data'); + if (!$comments_data) { + $comments_data = array(); + } + $comments_data[] = array( + 'annotation_id' => $comment->id, + 'text' => $comment_str, + 'owner_guid' => $comment->owner_guid, + 'time_created' => $comment->time_created, + ); + $entity->setVolatileData('search_comments_data', $comments_data); $entities[] = $entity; } diff --git a/mod/search/views/default/search/comments/entity.php b/mod/search/views/default/search/comments/entity.php index 005bb270c..77e950843 100644 --- a/mod/search/views/default/search/comments/entity.php +++ b/mod/search/views/default/search/comments/entity.php @@ -6,8 +6,11 @@ */ $entity = $vars['entity']; +$comments_data = $entity->getVolatileData('search_comments_data'); +$comment_data = array_shift($comments_data); +$entity->setVolatileData('search_comments_data', $comments_data); -$owner = get_entity($entity->getVolatileData('search_matched_comment_owner_guid')); +$owner = get_entity($comment_data['owner_guid']); if ($owner instanceof ElggUser) { $icon = elgg_view_entity_icon($owner, 'tiny'); @@ -38,12 +41,12 @@ if ($entity->getVolatileData('search_unavailable_entity')) { $title = elgg_echo('search:comment_on', array($title)); // @todo this should use something like $comment->getURL() - $url = $entity->getURL() . '#comment_' . $entity->getVolatileData('search_match_annotation_id'); + $url = $entity->getURL() . '#comment_' . $comment_data['annotation_id']; $title = "$title"; } -$description = $entity->getVolatileData('search_matched_comment'); -$tc = $entity->getVolatileData('search_matched_comment_time_created');; +$description = $comment_data['text']; +$tc = $comment_data['time_created']; $time = elgg_view_friendly_time($tc); $body = "

    $title

    $description"; diff --git a/mod/search/views/rss/search/comments/entity.php b/mod/search/views/rss/search/comments/entity.php index 869779f35..e47afec4a 100644 --- a/mod/search/views/rss/search/comments/entity.php +++ b/mod/search/views/rss/search/comments/entity.php @@ -6,9 +6,12 @@ */ $entity = $vars['entity']; +$comments_data = $entity->getVolatileData('search_comments_data'); +$comment_data = array_shift($comments_data); +$entity->setVolatileData('search_comments_data', $comments_data); $author_name = ''; -$comment_author_guid = $entity->getVolatileData('search_matched_comment_owner_guid'); +$comment_author_guid = $comment_data['owner_guid']; $author = get_user($comment_author_guid); if ($author) { $author_name = $author->name; @@ -34,11 +37,11 @@ if ($entity->getVolatileData('search_unavailable_entity')) { $title = elgg_echo('search:comment_on', array($title)); $title .= ' ' . elgg_echo('search:comment_by') . ' ' . $author_name; - $url = $entity->getURL() . '#annotation-' . $entity->getVolatileData('search_match_annotation_id'); + $url = $entity->getURL() . '#annotation-' . $comment_data['annotation_id']; } -$description = $entity->getVolatileData('search_matched_comment'); -$tc = $entity->getVolatileData('search_matched_comment_time_created');; +$description = $comment_data['text']; +$tc = $comment_data['time_created']; ?> -- cgit v1.2.3 From d64cc6180d59768b33fd097bbcc87c91839aab0d Mon Sep 17 00:00:00 2001 From: cash Date: Fri, 5 Jul 2013 20:50:31 -0400 Subject: Fixes #5699 reversing order of months on blog sidebar --- mod/blog/views/default/blog/sidebar/archives.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mod/blog/views/default/blog/sidebar/archives.php b/mod/blog/views/default/blog/sidebar/archives.php index 3d8f28ca4..5098e6e3e 100644 --- a/mod/blog/views/default/blog/sidebar/archives.php +++ b/mod/blog/views/default/blog/sidebar/archives.php @@ -14,7 +14,7 @@ if (elgg_instanceof($page_owner, 'user')) { // This is a limitation of the URL schema. if ($page_owner && $vars['page'] != 'friends') { - $dates = get_entity_dates('object', 'blog', $page_owner->getGUID()); + $dates = array_reverse(get_entity_dates('object', 'blog', $page_owner->getGUID())); if ($dates) { $title = elgg_echo('blog:archives'); -- cgit v1.2.3 From 217c51eb6bbf754570cc9347a031b72927bcc985 Mon Sep 17 00:00:00 2001 From: cash Date: Fri, 5 Jul 2013 21:03:34 -0400 Subject: Fixes #5671 fixed query for clearing admin settings on plugin --- engine/classes/ElggPlugin.php | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/engine/classes/ElggPlugin.php b/engine/classes/ElggPlugin.php index c1c46f272..3cc90f623 100644 --- a/engine/classes/ElggPlugin.php +++ b/engine/classes/ElggPlugin.php @@ -350,11 +350,14 @@ class ElggPlugin extends ElggObject { */ public function unsetAllSettings() { $db_prefix = get_config('dbprefix'); - $ps_prefix = elgg_namespace_plugin_private_setting('setting', ''); + + $us_prefix = elgg_namespace_plugin_private_setting('user_setting', '', $this->getID()); + $is_prefix = elgg_namespace_plugin_private_setting('internal', '', $this->getID()); $q = "DELETE FROM {$db_prefix}private_settings WHERE entity_guid = $this->guid - AND name NOT LIKE '$ps_prefix%'"; + AND name NOT LIKE '$us_prefix%' + AND name NOT LIKE '$is_prefix%'"; return delete_data($q); } -- cgit v1.2.3 From c301e22224db8b064c1de68799b8e49cc1259e2e Mon Sep 17 00:00:00 2001 From: cash Date: Fri, 5 Jul 2013 21:21:00 -0400 Subject: Refs #5662 added error message for what should be impossible condition --- engine/classes/ElggPlugin.php | 2 +- engine/classes/ElggPluginPackage.php | 1 + languages/en.php | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/engine/classes/ElggPlugin.php b/engine/classes/ElggPlugin.php index 3cc90f623..7bf6eb1df 100644 --- a/engine/classes/ElggPlugin.php +++ b/engine/classes/ElggPlugin.php @@ -549,7 +549,7 @@ class ElggPlugin extends ElggObject { * Returns if the plugin is complete, meaning has all required files * and Elgg can read them and they make sense. * - * @todo bad name? This could be confused with isValid() from ElggPackage. + * @todo bad name? This could be confused with isValid() from ElggPluginPackage. * * @return bool */ diff --git a/engine/classes/ElggPluginPackage.php b/engine/classes/ElggPluginPackage.php index 209242288..37eb4bf4d 100644 --- a/engine/classes/ElggPluginPackage.php +++ b/engine/classes/ElggPluginPackage.php @@ -294,6 +294,7 @@ class ElggPluginPackage { return true; } + $this->errorMsg = elgg_echo('unknown_error'); return false; } diff --git a/languages/en.php b/languages/en.php index 49e366484..1721865f7 100644 --- a/languages/en.php +++ b/languages/en.php @@ -903,6 +903,7 @@ $english = array( 'total' => 'Total', 'learnmore' => "Click here to learn more.", + 'unknown_error' => 'Unknown error', 'content' => "content", 'content:latest' => 'Latest activity', -- cgit v1.2.3 From 25929426871cc09d81b0612b239e9ea025bc808c Mon Sep 17 00:00:00 2001 From: cash Date: Fri, 5 Jul 2013 21:36:20 -0400 Subject: Fixes #5613 group member list alphabetical --- mod/groups/lib/groups.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mod/groups/lib/groups.php b/mod/groups/lib/groups.php index e5b047eba..77d7c09cc 100644 --- a/mod/groups/lib/groups.php +++ b/mod/groups/lib/groups.php @@ -366,12 +366,15 @@ function groups_handle_members_page($guid) { elgg_push_breadcrumb($group->name, $group->getURL()); elgg_push_breadcrumb(elgg_echo('groups:members')); + $db_prefix = elgg_get_config('dbprefix'); $content = elgg_list_entities_from_relationship(array( 'relationship' => 'member', 'relationship_guid' => $group->guid, 'inverse_relationship' => true, 'type' => 'user', 'limit' => 20, + 'joins' => array("JOIN {$db_prefix}users_entity u ON e.guid=u.guid"), + 'order_by' => 'u.name ASC', )); $params = array( -- cgit v1.2.3 From 802b73ade2efe405fb0ba695dc37679002fd1c1c Mon Sep 17 00:00:00 2001 From: cash Date: Fri, 5 Jul 2013 21:51:33 -0400 Subject: Fixes #4872 not passing an array to view that expects HTML --- views/default/object/default.php | 1 - 1 file changed, 1 deletion(-) diff --git a/views/default/object/default.php b/views/default/object/default.php index 110648304..70e098742 100644 --- a/views/default/object/default.php +++ b/views/default/object/default.php @@ -41,7 +41,6 @@ $params = array( 'title' => $title, 'metadata' => $metadata, 'subtitle' => $subtitle, - 'tags' => $vars['entity']->tags, ); $params = $params + $vars; $body = elgg_view('object/elements/summary', $params); -- cgit v1.2.3 From 094b8f3afc75dac6520ea58d4e014b0e7f77d62a Mon Sep 17 00:00:00 2001 From: cash Date: Fri, 5 Jul 2013 22:05:11 -0400 Subject: Fixes #4819 clearing floated images in the content area --- mod/groups/views/default/object/groupforumtopic.php | 5 ++++- views/default/object/elements/full.php | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/mod/groups/views/default/object/groupforumtopic.php b/mod/groups/views/default/object/groupforumtopic.php index 34e0ee3cc..e6988d16e 100644 --- a/mod/groups/views/default/object/groupforumtopic.php +++ b/mod/groups/views/default/object/groupforumtopic.php @@ -73,7 +73,10 @@ if ($full) { $info = elgg_view_image_block($poster_icon, $list_body); - $body = elgg_view('output/longtext', array('value' => $topic->description)); + $body = elgg_view('output/longtext', array( + 'value' => $topic->description, + 'class' => 'clearfix', + )); echo << Date: Fri, 5 Jul 2013 22:31:56 -0400 Subject: Fixes #4293 don't require both offset key and offset for listing stuff --- engine/lib/entities.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/engine/lib/entities.php b/engine/lib/entities.php index 226cf5c6c..997db79d2 100644 --- a/engine/lib/entities.php +++ b/engine/lib/entities.php @@ -1473,8 +1473,10 @@ function elgg_list_entities(array $options = array(), $getter = 'elgg_get_entiti global $autofeed; $autofeed = true; + $offset_key = isset($options['offset_key']) ? $options['offset_key'] : 'offset'; + $defaults = array( - 'offset' => (int) max(get_input('offset', 0), 0), + 'offset' => (int) max(get_input($offset_key, 0), 0), 'limit' => (int) max(get_input('limit', 10), 0), 'full_view' => TRUE, 'list_type_toggle' => FALSE, -- cgit v1.2.3 From 9892692deefdb06d9e7176c72fc5780ab79e3a7d Mon Sep 17 00:00:00 2001 From: Brett Profitt Date: Tue, 9 Jul 2013 12:13:17 -0400 Subject: Fixes #5706. Allowing parens in URIs if not last character. --- engine/lib/output.php | 8 ++++---- engine/tests/regression/trac_bugs.php | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/engine/lib/output.php b/engine/lib/output.php index 5adc01053..6172a5c8d 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -27,16 +27,16 @@ function parse_urls($text) { // By default htmlawed rewrites tags to this format. // if PHP supported conditional negative lookbehinds we could use this: // $r = preg_replace_callback('/(?"\'\!\(\),]+)/i', - $r = preg_replace_callback('/(?"\'\(\)]+)/i', + $r = preg_replace_callback('/(?"\']+)/i', create_function( '$matches', ' $url = $matches[1]; - $punc = \'\'; + $punc = ""; $last = substr($url, -1, 1); - if (in_array($last, array(".", "!", ","))) { + if (in_array($last, array(".", "!", ",", "(", ")"))) { $punc = $last; - $url = rtrim($url, ".!,"); + $url = rtrim($url, ".!,()"); } $urltext = str_replace("/", "/", $url); return "$urltext$punc"; diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index 5730830bb..f173b5b9f 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -291,6 +291,9 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { 'unquoted already anchor yahoo' => 'unquoted already anchor yahoo', + + 'parens in uri http://thedailywtf.com/Articles/A-(Long-Overdue)-BuildMaster-Introduction.aspx' => + 'parens in uri http://thedailywtf.com/Articles/A-(Long-Overdue)-BuildMaster-Introduction.aspx' ); foreach ($cases as $input => $output) { $this->assertEqual($output, parse_urls($input)); -- cgit v1.2.3 From cf15971dc02b59e2ea36041aaec69bc1b9b21a64 Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Sat, 13 Jul 2013 08:06:54 -0400 Subject: Fixes #5800 cast user guid to an int --- engine/lib/sessions.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/engine/lib/sessions.php b/engine/lib/sessions.php index a34c2045b..fb28e1e9a 100644 --- a/engine/lib/sessions.php +++ b/engine/lib/sessions.php @@ -87,6 +87,9 @@ function elgg_is_admin_logged_in() { */ function elgg_is_admin_user($user_guid) { global $CONFIG; + + $user_guid = (int)$user_guid; + // cannot use magic metadata here because of recursion // must support the old way of getting admin from metadata -- cgit v1.2.3 From 2cab755b9df35f5e338151cf1e94cbce5f544e98 Mon Sep 17 00:00:00 2001 From: Cash Costello Date: Sat, 13 Jul 2013 08:39:58 -0400 Subject: adds warning for those using a string value with the operand "IN" for metadata pairs --- engine/lib/metadata.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/engine/lib/metadata.php b/engine/lib/metadata.php index 046b85124..d2f8d4cd4 100644 --- a/engine/lib/metadata.php +++ b/engine/lib/metadata.php @@ -402,9 +402,11 @@ function elgg_enable_metadata(array $options) { * 'operand' => '=', * 'case_sensitive' => TRUE * ) - * Currently if multiple values are sent via + * Currently if multiple values are sent via * an array (value => array('value1', 'value2') * the pair's operand will be forced to "IN". + * If passing "IN" as the operand and a string as the value, + * the value must be a properly quoted and escaped string. * * metadata_name_value_pairs_operator => NULL|STR The operator to use for combining * (name = value) OPERATOR (name = value); default AND -- cgit v1.2.3 From aa3b8ac53fb0f761061ca8ac713427daa5186750 Mon Sep 17 00:00:00 2001 From: Jerome Bakker Date: Tue, 16 Jul 2013 11:38:58 +0200 Subject: changed: page title now matches the document title --- pages/river.php | 1 + 1 file changed, 1 insertion(+) diff --git a/pages/river.php b/pages/river.php index 0e1511334..801d9f664 100644 --- a/pages/river.php +++ b/pages/river.php @@ -49,6 +49,7 @@ $content = elgg_view('core/river/filter', array('selector' => $selector)); $sidebar = elgg_view('core/river/sidebar'); $params = array( + 'title' => $title, 'content' => $content . $activity, 'sidebar' => $sidebar, 'filter_context' => $page_filter, -- cgit v1.2.3 From c09b6c3b91fb46c9816af3bc54a02670bcc6567a Mon Sep 17 00:00:00 2001 From: Brett Profitt Date: Thu, 18 Jul 2013 17:33:16 -0400 Subject: Fixes #5834. Wrapping input with label to prevent line break. --- views/default/input/userpicker.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/views/default/input/userpicker.php b/views/default/input/userpicker.php index 91a397e37..8b64d7df5 100644 --- a/views/default/input/userpicker.php +++ b/views/default/input/userpicker.php @@ -63,11 +63,13 @@ foreach ($vars['value'] as $user_id) { ?>
    - - +
    \ No newline at end of file + -- cgit v1.2.3 From 392e8e691066c7a7ca9ef82c24f30ef235e1df93 Mon Sep 17 00:00:00 2001 From: Brett Profitt Date: Fri, 9 Aug 2013 18:10:11 -0400 Subject: Fixes #5898. Fixed selector for finding the only friends checkbox. --- js/lib/ui.userpicker.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/js/lib/ui.userpicker.js b/js/lib/ui.userpicker.js index 7298da114..669b84cdb 100644 --- a/js/lib/ui.userpicker.js +++ b/js/lib/ui.userpicker.js @@ -107,11 +107,11 @@ elgg.userpicker.viewUser = function(info) { * @return Object */ elgg.userpicker.getSearchParams = function(obj) { - if (obj.element.siblings('[name=match_on]').attr('checked')) { + if (obj.element.parent('.elgg-user-picker').find('input[name=match_on]').attr('checked')) { return {'match_on[]': 'friends', 'term' : obj.term}; } else { return {'match_on[]': 'users', 'term' : obj.term}; } }; -elgg.register_hook_handler('init', 'system', elgg.userpicker.init); \ No newline at end of file +elgg.register_hook_handler('init', 'system', elgg.userpicker.init); -- cgit v1.2.3 From fb283378bdb2c987fc85c14aa8d644fe02996168 Mon Sep 17 00:00:00 2001 From: Sem Date: Sun, 1 Sep 2013 20:06:32 +0200 Subject: Fixed links in logbrowser table. --- mod/logbrowser/views/default/logbrowser/table.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mod/logbrowser/views/default/logbrowser/table.php b/mod/logbrowser/views/default/logbrowser/table.php index 1223c1456..b08a0c428 100644 --- a/mod/logbrowser/views/default/logbrowser/table.php +++ b/mod/logbrowser/views/default/logbrowser/table.php @@ -35,7 +35,7 @@ $log_entries = $vars['log_entries']; 'is_trusted' => true, )); $user_guid_link = elgg_view('output/url', array( - 'href' => "admin/overview/logbrowser?user_guid=$user->guid", + 'href' => "admin/administer_utilities/logbrowser?user_guid={$user->guid}", 'text' => $user->getGUID(), 'is_trusted' => true, )); -- cgit v1.2.3 From ec68afb872fe006f74178a679cb6b69c9ff33bc1 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Sun, 1 Sep 2013 16:19:07 -0400 Subject: Pages/Bookmarks: Match language convention for adding an object --- mod/bookmarks/languages/en.php | 2 +- mod/pages/languages/en.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mod/bookmarks/languages/en.php b/mod/bookmarks/languages/en.php index d4980280d..970b39415 100644 --- a/mod/bookmarks/languages/en.php +++ b/mod/bookmarks/languages/en.php @@ -9,7 +9,7 @@ $english = array( * Menu items and titles */ 'bookmarks' => "Bookmarks", - 'bookmarks:add' => "Add bookmark", + 'bookmarks:add' => "Add a bookmark", 'bookmarks:edit' => "Edit bookmark", 'bookmarks:owner' => "%s's bookmarks", 'bookmarks:friends' => "Friends' bookmarks", diff --git a/mod/pages/languages/en.php b/mod/pages/languages/en.php index 13b6ece2a..c204c1901 100644 --- a/mod/pages/languages/en.php +++ b/mod/pages/languages/en.php @@ -15,7 +15,7 @@ $english = array( 'pages:owner' => "%s's pages", 'pages:friends' => "Friends' pages", 'pages:all' => "All site pages", - 'pages:add' => "Add page", + 'pages:add' => "Add a page", 'pages:group' => "Group pages", 'groups:enablepages' => 'Enable group pages', -- cgit v1.2.3 From 283106afa1fb6ff9984341b8911f90c5d4e4c4a2 Mon Sep 17 00:00:00 2001 From: Sem Date: Thu, 12 Sep 2013 00:15:52 +0200 Subject: Fixes #6052. Urldecoding usernames to allow non-alphanumeric characters. --- engine/lib/users.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/lib/users.php b/engine/lib/users.php index 9a5194896..0b4608034 100644 --- a/engine/lib/users.php +++ b/engine/lib/users.php @@ -553,7 +553,7 @@ function get_user($guid) { function get_user_by_username($username) { global $CONFIG, $USERNAME_TO_GUID_MAP_CACHE; - $username = sanitise_string($username); + $username = sanitise_string(rawurldecode($username)); $access = get_access_sql_suffix('e'); // Caching -- cgit v1.2.3 From c1ea910e3b3b0bcc27a214383c9f6355a05dd495 Mon Sep 17 00:00:00 2001 From: Paweł Sroka Date: Thu, 12 Sep 2013 05:59:18 +0200 Subject: Added function for escaping query strings and fixed several XSRF vulnerabilities. --- engine/lib/output.php | 19 +++++++++++++++++++ mod/groups/lib/groups.php | 3 ++- mod/members/pages/members/search.php | 8 ++++++-- mod/search/pages/search/index.php | 10 +--------- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/engine/lib/output.php b/engine/lib/output.php index 6172a5c8d..de4f911fb 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -420,6 +420,25 @@ function _elgg_html_decode($string) { return $string; } +/** + * Prepares query string for output to prevent CSRF attacks. + * + * @param string $string + * @return string + * + * @access private + */ +function _elgg_get_display_query($string) { + //encode <,>,&, quotes and characters above 127 + if (function_exists('mb_convert_encoding')) { + $display_query = mb_convert_encoding($string, 'HTML-ENTITIES', 'UTF-8'); + } else { + // if no mbstring extension, we just strip characters + $display_query = preg_replace("/[^\x01-\x7F]/", "", $string); + } + return htmlspecialchars($display_query, ENT_QUOTES, 'UTF-8', false); +} + /** * Unit tests for Output * diff --git a/mod/groups/lib/groups.php b/mod/groups/lib/groups.php index 77d7c09cc..aa8766e06 100644 --- a/mod/groups/lib/groups.php +++ b/mod/groups/lib/groups.php @@ -73,7 +73,8 @@ function groups_search_page() { elgg_push_breadcrumb(elgg_echo('search')); $tag = get_input("tag"); - $title = elgg_echo('groups:search:title', array($tag)); + $display_query = _elgg_get_display_query($tag); + $title = elgg_echo('groups:search:title', array($display_query)); // groups plugin saves tags as "interests" - see groups_fields_setup() in start.php $params = array( diff --git a/mod/members/pages/members/search.php b/mod/members/pages/members/search.php index 1f0444d67..5466a8246 100644 --- a/mod/members/pages/members/search.php +++ b/mod/members/pages/members/search.php @@ -7,7 +7,9 @@ if ($vars['search_type'] == 'tag') { $tag = get_input('tag'); - $title = elgg_echo('members:title:searchtag', array($tag)); + $display_query = _elgg_get_display_query($tag); + + $title = elgg_echo('members:title:searchtag', array($display_query)); $options = array(); $options['query'] = $tag; @@ -28,7 +30,9 @@ if ($vars['search_type'] == 'tag') { } else { $name = sanitize_string(get_input('name')); - $title = elgg_echo('members:title:searchname', array($name)); + $display_query = _elgg_get_display_query($name); + + $title = elgg_echo('members:title:searchname', array($display_query)); $db_prefix = elgg_get_config('dbprefix'); $params = array( diff --git a/mod/search/pages/search/index.php b/mod/search/pages/search/index.php index ede09329b..9542e0751 100644 --- a/mod/search/pages/search/index.php +++ b/mod/search/pages/search/index.php @@ -17,15 +17,7 @@ $search_type = get_input('search_type', 'all'); // XSS protection is more important that searching for HTML. $query = stripslashes(get_input('q', get_input('tag', ''))); -// @todo - create function for sanitization of strings for display in 1.8 -// encode <,>,&, quotes and characters above 127 -if (function_exists('mb_convert_encoding')) { - $display_query = mb_convert_encoding($query, 'HTML-ENTITIES', 'UTF-8'); -} else { - // if no mbstring extension, we just strip characters - $display_query = preg_replace("/[^\x01-\x7F]/", "", $query); -} -$display_query = htmlspecialchars($display_query, ENT_QUOTES, 'UTF-8', false); +$display_query = _elgg_get_display_query($query); // check that we have an actual query if (!$query) { -- cgit v1.2.3 From 0f448571e3618a2a4ef56e377ff26c6278585b48 Mon Sep 17 00:00:00 2001 From: Matt Beckett Date: Thu, 12 Sep 2013 22:10:51 -0700 Subject: collections pages are only for logged in users --- engine/lib/users.php | 1 + 1 file changed, 1 insertion(+) diff --git a/engine/lib/users.php b/engine/lib/users.php index 9a5194896..e26b6cd4b 100644 --- a/engine/lib/users.php +++ b/engine/lib/users.php @@ -1091,6 +1091,7 @@ function friends_page_handler($segments, $handler) { * @access private */ function collections_page_handler($page_elements) { + gatekeeper(); elgg_set_context('friends'); $base = elgg_get_config('path'); if (isset($page_elements[0])) { -- cgit v1.2.3 From 25cdb9287c734dbe7fb29704f39537dd1a1a0cc0 Mon Sep 17 00:00:00 2001 From: Matt Beckett Date: Thu, 12 Sep 2013 22:13:45 -0700 Subject: spacing --- engine/lib/users.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/lib/users.php b/engine/lib/users.php index e26b6cd4b..a3813e6a8 100644 --- a/engine/lib/users.php +++ b/engine/lib/users.php @@ -1091,7 +1091,7 @@ function friends_page_handler($segments, $handler) { * @access private */ function collections_page_handler($page_elements) { - gatekeeper(); + gatekeeper(); elgg_set_context('friends'); $base = elgg_get_config('path'); if (isset($page_elements[0])) { -- cgit v1.2.3 From e02239465b538d0364043e4e8613279eb04408f8 Mon Sep 17 00:00:00 2001 From: Matt Beckett Date: Thu, 12 Sep 2013 22:20:34 -0700 Subject: stop page propagation if there is no page or container --- mod/pages/pages/pages/history.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mod/pages/pages/pages/history.php b/mod/pages/pages/pages/history.php index 872596179..7f5fa4f4f 100644 --- a/mod/pages/pages/pages/history.php +++ b/mod/pages/pages/pages/history.php @@ -9,12 +9,12 @@ $page_guid = get_input('guid'); $page = get_entity($page_guid); if (!$page) { - + forward('', '404'); } $container = $page->getContainerEntity(); if (!$container) { - + forward('', '404'); } elgg_set_page_owner_guid($container->getGUID()); -- cgit v1.2.3 From 32f2a17bec4dc7e19cfdc5f2e5dd55b37732e910 Mon Sep 17 00:00:00 2001 From: Jeroen Dalsem Date: Fri, 20 Sep 2013 10:36:27 +0200 Subject: detect pagination needs --- engine/lib/river.php | 10 ++++++++-- engine/lib/views.php | 20 +++++++++++++++----- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/engine/lib/river.php b/engine/lib/river.php index 4926a85c4..e92040eb7 100644 --- a/engine/lib/river.php +++ b/engine/lib/river.php @@ -120,7 +120,7 @@ $posted = 0, $annotation_id = 0) { * subtypes => STR|ARR Entity subtype string(s) * type_subtype_pairs => ARR Array of type => subtype pairs where subtype * can be an array of subtype strings - * + * * posted_time_lower => INT The lower bound on the time posted * posted_time_upper => INT The upper bound on the time posted * @@ -434,8 +434,13 @@ function elgg_list_river(array $options = array()) { 'pagination' => TRUE, 'list_class' => 'elgg-list-river elgg-river', // @todo remove elgg-river in Elgg 1.9 ); - + $options = array_merge($defaults, $options); + + if (!$options["limit"] && !$options["offset"]) { + // no need for pagination if listing is unlimited + $options["pagination"] = false; + } $options['count'] = TRUE; $count = elgg_get_river($options); @@ -445,6 +450,7 @@ function elgg_list_river(array $options = array()) { $options['count'] = $count; $options['items'] = $items; + return elgg_view('page/components/list', $options); } diff --git a/engine/lib/views.php b/engine/lib/views.php index 65ba20204..7f179f572 100644 --- a/engine/lib/views.php +++ b/engine/lib/views.php @@ -218,7 +218,7 @@ function elgg_register_ajax_view($view) { /** * Unregister a view for ajax calls - * + * * @param string $view The view name * @return void * @since 1.8.3 @@ -992,6 +992,11 @@ function elgg_view_annotation(ElggAnnotation $annotation, array $vars = array(), function elgg_view_entity_list($entities, $vars = array(), $offset = 0, $limit = 10, $full_view = true, $list_type_toggle = true, $pagination = true) { + if (!$vars["limit"] && !$vars["offset"]) { + // no need for pagination if listing is unlimited + $vars["pagination"] = false; + } + if (!is_int($offset)) { $offset = (int)get_input('offset', 0); } @@ -1064,8 +1069,13 @@ function elgg_view_annotation_list($annotations, array $vars = array()) { 'full_view' => true, 'offset_key' => 'annoff', ); - + $vars = array_merge($defaults, $vars); + + if (!$vars["limit"] && !$vars["offset"]) { + // no need for pagination if listing is unlimited + $vars["pagination"] = false; + } return elgg_view('page/components/list', $vars); } @@ -1334,12 +1344,12 @@ function elgg_view_list_item($item, array $vars = array()) { /** * View one of the elgg sprite icons - * + * * Shorthand for - * + * * @param string $name The specific icon to display * @param string $class Additional class: float, float-alt, or custom class - * + * * @return string The html for displaying an icon */ function elgg_view_icon($name, $class = '') { -- cgit v1.2.3 From ee2b6351f5a759b6e713d3992c3b0c348850fecf Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Fri, 20 Sep 2013 21:02:30 -0400 Subject: Adds comment to explain URL decoding in get_user_by_username --- engine/lib/users.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/engine/lib/users.php b/engine/lib/users.php index 0b4608034..bccfb8b03 100644 --- a/engine/lib/users.php +++ b/engine/lib/users.php @@ -553,7 +553,12 @@ function get_user($guid) { function get_user_by_username($username) { global $CONFIG, $USERNAME_TO_GUID_MAP_CACHE; - $username = sanitise_string(rawurldecode($username)); + // Fixes #6052. Username is frequently sniffed from the path info, which, + // unlike $_GET, is not URL decoded. If the username was not URL encoded, + // this is harmless. + $username = rawurldecode($username); + + $username = sanitise_string($username); $access = get_access_sql_suffix('e'); // Caching -- cgit v1.2.3 From 49ab3a17173aedb8b5e3a2a228cc6cfd0a510e49 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Fri, 20 Sep 2013 21:19:06 -0400 Subject: Test that get_user_by_username accepts URL encoded input --- engine/tests/objects/users.php | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/engine/tests/objects/users.php b/engine/tests/objects/users.php index dc9129326..7d2ef6961 100644 --- a/engine/tests/objects/users.php +++ b/engine/tests/objects/users.php @@ -159,6 +159,22 @@ class ElggCoreUserTest extends ElggCoreUnitTest { $this->assertFalse($user); } + public function testGetUserByUsernameAcceptsUrlEncoded() { + $username = (string)time(); + $this->user->username = $username; + $guid = $this->user->save(); + + // percent encode first letter + $first_letter = $username[0]; + $first_letter = str_pad('%' . dechex(ord($first_letter)), 2, '0', STR_PAD_LEFT); + $username = $first_letter . substr($username, 1); + + $user = get_user_by_username($username); + $this->assertTrue((bool) $user); + $this->assertEqual($guid, $user->guid); + + $this->user->delete(); + } public function testElggUserMakeAdmin() { global $CONFIG; -- cgit v1.2.3 From 1150f831637c87322fcc85cb59758805d48c90e0 Mon Sep 17 00:00:00 2001 From: Matt Beckett Date: Thu, 26 Sep 2013 13:36:17 -0700 Subject: Fix redeclaring $size variable - breaks user icon if filesize == 0 or the file somehow no longer exists --- mod/profile/icondirect.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mod/profile/icondirect.php b/mod/profile/icondirect.php index dbab5d31f..85221945a 100644 --- a/mod/profile/icondirect.php +++ b/mod/profile/icondirect.php @@ -55,8 +55,8 @@ if ($mysql_dblink) { $user_path = date('Y/m/d/', $join_date) . $guid; $filename = "$data_root$user_path/profile/{$guid}{$size}.jpg"; - $size = @filesize($filename); - if ($size) { + $filesize = @filesize($filename); + if ($filesize) { header("Content-type: image/jpeg"); header('Expires: ' . gmdate('D, d M Y H:i:s \G\M\T', strtotime("+6 months")), true); header("Pragma: public"); -- cgit v1.2.3 From 12dbe2faa03281cc4c1c30d0f9635620676ac49a Mon Sep 17 00:00:00 2001 From: Matt Beckett Date: Thu, 26 Sep 2013 14:13:23 -0700 Subject: change variable name for content length --- mod/profile/icondirect.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mod/profile/icondirect.php b/mod/profile/icondirect.php index 85221945a..5f1599e0d 100644 --- a/mod/profile/icondirect.php +++ b/mod/profile/icondirect.php @@ -61,7 +61,7 @@ if ($mysql_dblink) { header('Expires: ' . gmdate('D, d M Y H:i:s \G\M\T', strtotime("+6 months")), true); header("Pragma: public"); header("Cache-Control: public"); - header("Content-Length: $size"); + header("Content-Length: $filesize"); header("ETag: \"$etag\""); readfile($filename); exit; -- cgit v1.2.3 From ceb9829595b62330ec0d6903287c498e6d6ee37c Mon Sep 17 00:00:00 2001 From: Jerome Bakker Date: Mon, 30 Sep 2013 10:34:22 +0200 Subject: fixes #6100 auth_gettoken now works with email address --- engine/lib/web_services.php | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/engine/lib/web_services.php b/engine/lib/web_services.php index b440e3afb..51cad6f39 100644 --- a/engine/lib/web_services.php +++ b/engine/lib/web_services.php @@ -1166,6 +1166,17 @@ function list_all_apis() { * @access private */ function auth_gettoken($username, $password) { + // check if username is an email address + if (is_email_address($username)) { + $users = get_user_by_email($username); + + // check if we have a unique user + if (is_array($users) && (count($users) == 1)) { + $username = $users[0]->username; + } + } + + // validate username and password if (true === elgg_authenticate($username, $password)) { $token = create_user_token($username); if ($token) { @@ -1195,7 +1206,7 @@ $ERRORS = array(); * * @return void * @access private - * + * * @throws Exception */ function _php_api_error_handler($errno, $errmsg, $filename, $linenum, $vars) { -- cgit v1.2.3 From 088eb2ce72bfe3852a19f4387e28fee8bdba69c7 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Tue, 8 Oct 2013 08:46:01 -0400 Subject: Fix inline docs for set_input --- engine/lib/input.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/engine/lib/input.php b/engine/lib/input.php index 2d9bae4dd..80b0b8766 100644 --- a/engine/lib/input.php +++ b/engine/lib/input.php @@ -60,8 +60,8 @@ function get_input($variable, $default = NULL, $filter_result = TRUE) { * * Note: this function does not handle nested arrays (ex: form input of param[m][n]) * - * @param string $variable The name of the variable - * @param string $value The value of the variable + * @param string $variable The name of the variable + * @param string|string[] $value The value of the variable * * @return void */ -- cgit v1.2.3 From b3cf5a302d25b06421a055f280ca4f654bd8e6a7 Mon Sep 17 00:00:00 2001 From: beck24 Date: Sun, 13 Oct 2013 00:03:11 -0700 Subject: Fixes #6177 - refuse to deactive plugins used as dependencies --- engine/lib/plugins.php | 35 +++++++++++++++++++++++++++++++++++ languages/en.php | 2 ++ 2 files changed, 37 insertions(+) diff --git a/engine/lib/plugins.php b/engine/lib/plugins.php index 74bce45fd..f0d89e92d 100644 --- a/engine/lib/plugins.php +++ b/engine/lib/plugins.php @@ -1104,6 +1104,39 @@ function plugins_test($hook, $type, $value, $params) { return $value; } +function plugins_deactivate_dependency_check($event, $type, $params) { + $plugin_id = $params['plugin_entity']->getManifest()->getPluginID(); + $plugin_name = $params['plugin_entity']->getManifest()->getName(); + + $active_plugins = elgg_get_plugins(); + + $dependents = array(); + foreach ($active_plugins as $plugin) { + $manifest = $plugin->getManifest(); + $requires = $manifest->getRequires(); + + foreach ($requires as $required) { + if ($required['type'] == 'plugin' && $required['name'] == $plugin_id) { + // there are active dependents + $dependents[$manifest->getPluginID()] = $plugin; + } + } + } + + if ($dependents) { + $list = '
      '; + // construct error message and prevent disabling + foreach ($dependents as $dependent) { + $list .= '
    • ' . $dependent->getManifest()->getName() . '
    • '; + } + $list .= '
    '; + + register_error(elgg_echo('ElggPlugin:Dependencies:ActiveDependent', array($plugin_name, $list))); + + return false; + } +} + /** * Initialize the plugin system * Listens to system init and registers actions @@ -1115,6 +1148,8 @@ function plugin_init() { run_function_once("plugin_run_once"); elgg_register_plugin_hook_handler('unit_test', 'system', 'plugins_test'); + + elgg_register_event_handler('deactivate', 'plugin', 'plugins_deactivate_dependency_check'); elgg_register_action("plugins/settings/save", '', 'admin'); elgg_register_action("plugins/usersettings/save"); diff --git a/languages/en.php b/languages/en.php index 1721865f7..ad4831db7 100644 --- a/languages/en.php +++ b/languages/en.php @@ -105,6 +105,8 @@ $english = array( 'ElggPlugin:Dependencies:Priority:Before' => 'Before %s', 'ElggPlugin:Dependencies:Priority:Uninstalled' => '%s is not installed', 'ElggPlugin:Dependencies:Suggests:Unsatisfied' => 'Missing', + + 'ElggPlugin:Dependencies:ActiveDependent' => 'There are other plugins that list %s as a dependency. You must disable the following plugins before disabling this one: %s', 'ElggPlugin:InvalidAndDeactivated' => '%s is an invalid plugin and has been deactivated.', -- cgit v1.2.3 From 634216f0978d037fb84ef8e68e4e4272752c22fb Mon Sep 17 00:00:00 2001 From: beck24 Date: Sun, 13 Oct 2013 00:19:46 -0700 Subject: whitespace fix --- engine/lib/plugins.php | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/engine/lib/plugins.php b/engine/lib/plugins.php index f0d89e92d..e0aa705bb 100644 --- a/engine/lib/plugins.php +++ b/engine/lib/plugins.php @@ -1105,13 +1105,13 @@ function plugins_test($hook, $type, $value, $params) { } function plugins_deactivate_dependency_check($event, $type, $params) { - $plugin_id = $params['plugin_entity']->getManifest()->getPluginID(); - $plugin_name = $params['plugin_entity']->getManifest()->getName(); - - $active_plugins = elgg_get_plugins(); + $plugin_id = $params['plugin_entity']->getManifest()->getPluginID(); + $plugin_name = $params['plugin_entity']->getManifest()->getName(); + + $active_plugins = elgg_get_plugins(); $dependents = array(); - foreach ($active_plugins as $plugin) { + foreach ($active_plugins as $plugin) { $manifest = $plugin->getManifest(); $requires = $manifest->getRequires(); @@ -1121,9 +1121,9 @@ function plugins_deactivate_dependency_check($event, $type, $params) { $dependents[$manifest->getPluginID()] = $plugin; } } - } - - if ($dependents) { + } + + if ($dependents) { $list = '
      '; // construct error message and prevent disabling foreach ($dependents as $dependent) { -- cgit v1.2.3 From 6da43b70ca0de807c0532adb0bba65405d3ffbc1 Mon Sep 17 00:00:00 2001 From: beck24 Date: Sun, 13 Oct 2013 21:51:02 -0700 Subject: move deactivate event registration to the user-triggered action --- actions/admin/plugins/deactivate.php | 3 +++ engine/lib/plugins.php | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/actions/admin/plugins/deactivate.php b/actions/admin/plugins/deactivate.php index 354f4717d..adb86dd7a 100644 --- a/actions/admin/plugins/deactivate.php +++ b/actions/admin/plugins/deactivate.php @@ -10,6 +10,9 @@ * @package Elgg.Core * @subpackage Administration.Plugins */ + + // prevent disabling plugins with active dependents + elgg_register_event_handler('deactivate', 'plugin', 'plugins_deactivate_dependency_check'); $plugin_guids = get_input('plugin_guids'); diff --git a/engine/lib/plugins.php b/engine/lib/plugins.php index e0aa705bb..1b7ad5db9 100644 --- a/engine/lib/plugins.php +++ b/engine/lib/plugins.php @@ -1148,8 +1148,6 @@ function plugin_init() { run_function_once("plugin_run_once"); elgg_register_plugin_hook_handler('unit_test', 'system', 'plugins_test'); - - elgg_register_event_handler('deactivate', 'plugin', 'plugins_deactivate_dependency_check'); elgg_register_action("plugins/settings/save", '', 'admin'); elgg_register_action("plugins/usersettings/save"); -- cgit v1.2.3 From 0f18b3b09e2d033f0523736a15ba200b985ee9ac Mon Sep 17 00:00:00 2001 From: Jeroen Dalsem Date: Mon, 21 Oct 2013 08:36:22 +0200 Subject: forward to all if no group archive requested --- mod/blog/start.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mod/blog/start.php b/mod/blog/start.php index 91525acee..e724b91c2 100644 --- a/mod/blog/start.php +++ b/mod/blog/start.php @@ -152,7 +152,7 @@ function blog_page_handler($page) { if (!elgg_instanceof($group, 'group')) { forward('', '404'); } - if ($page[2] == 'all') { + if (!isset($page[2]) || $page[2] == 'all') { $params = blog_get_page_content_list($page[1]); } else { $params = blog_get_page_content_archive($page[1], $page[3], $page[4]); -- cgit v1.2.3 From 5a3095d0664dc8c1fa81c7c2e6fac6a8dc8f0eaf Mon Sep 17 00:00:00 2001 From: Jerome Bakker Date: Tue, 22 Oct 2013 14:35:38 +0200 Subject: fixed a unittest I broke in #398 --- engine/tests/api/entity_getter_functions.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/tests/api/entity_getter_functions.php b/engine/tests/api/entity_getter_functions.php index 0492b1fb0..fef9dc0c5 100644 --- a/engine/tests/api/entity_getter_functions.php +++ b/engine/tests/api/entity_getter_functions.php @@ -426,7 +426,7 @@ class ElggCoreEntityGetterFunctionsTest extends ElggCoreUnitTest { $options = array( 'types' => $types, - 'subtype' => $subtype + 'subtypes' => $subtype ); $es = elgg_get_entities($options); -- cgit v1.2.3 From 0d3c2bcf71a493cb5b0923227a2b043a09762fdb Mon Sep 17 00:00:00 2001 From: Jeroen Dalsem Date: Tue, 22 Oct 2013 16:46:06 +0200 Subject: remove thumbnails if no longer needed --- mod/file/actions/file/upload.php | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/mod/file/actions/file/upload.php b/mod/file/actions/file/upload.php index 7ee402121..e20c4079f 100644 --- a/mod/file/actions/file/upload.php +++ b/mod/file/actions/file/upload.php @@ -165,6 +165,23 @@ if (isset($_FILES['upload']['name']) && !empty($_FILES['upload']['name'])) { $file->largethumb = $prefix."largethumb".$filestorename; unset($thumblarge); } + } elseif ($file->icontime) { + // if it is not an image, we do not need thumbnails + unset($file->icontime); + + $thumb = new ElggFile(); + + $thumb->setFilename($prefix . "thumb" . $filestorename); + $thumb->delete(); + unset($file->thumbnail); + + $thumb->setFilename($prefix . "smallthumb" . $filestorename); + $thumb->delete(); + unset($file->smallthumb); + + $thumb->setFilename($prefix . "largethumb" . $filestorename); + $thumb->delete(); + unset($file->largethumb); } } else { // not saving a file but still need to save the entity to push attributes to database @@ -202,4 +219,4 @@ if ($new_file) { } forward($file->getURL()); -} +} -- cgit v1.2.3 From ad4473263c578918dfe93e6fbf7debc70727e486 Mon Sep 17 00:00:00 2001 From: Jerome Bakker Date: Wed, 23 Oct 2013 14:56:20 +0200 Subject: changed the bugtracker url from the developers plugin to GitHub --- mod/developers/manifest.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mod/developers/manifest.xml b/mod/developers/manifest.xml index e31998872..23e726e2b 100644 --- a/mod/developers/manifest.xml +++ b/mod/developers/manifest.xml @@ -8,7 +8,7 @@ Developer tools for Elgg A set of tools for writing plugins and themes. It is recommended that you have this plugin at the top of the plugin list. http://www.elgg.org/ - http://trac.elgg.org + https://github.com/Elgg/Elgg/issues See COPYRIGHT.txt GNU General Public License version 2 -- cgit v1.2.3 From 577201be6cff3dbe2a0ee6e35c9ab33a74a55f77 Mon Sep 17 00:00:00 2001 From: Jeroen Dalsem Date: Wed, 23 Oct 2013 15:11:11 +0200 Subject: fixed issue when reordering plugins to second last position of other column that has more than 2 widgets --- engine/classes/ElggWidget.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/engine/classes/ElggWidget.php b/engine/classes/ElggWidget.php index c123e5032..66191bf47 100644 --- a/engine/classes/ElggWidget.php +++ b/engine/classes/ElggWidget.php @@ -146,10 +146,15 @@ class ElggWidget extends ElggObject { } } + $bottom_rank = count($widgets); + if ($column == $this->column) { + $bottom_rank--; + } + if ($rank == 0) { // top of the column $this->order = reset($widgets)->order - 10; - } elseif ($rank == (count($widgets) - 1)) { + } elseif ($rank == $bottom_rank) { // bottom of the column of active widgets $this->order = end($widgets)->order + 10; } else { -- cgit v1.2.3 From 424eff09557bf5e0cee7f0c1a717b3992d2e82ac Mon Sep 17 00:00:00 2001 From: Jerome Bakker Date: Wed, 23 Oct 2013 15:35:05 +0200 Subject: replaced all references to trac.elgg.org to the correct GitHub issues --- documentation/info/manifest.xml | 2 +- engine/classes/ElggAttributeLoader.php | 12 ++++++------ engine/classes/ElggEntity.php | 12 ++++++------ engine/lib/database.php | 12 ++++++------ engine/lib/entities.php | 2 +- engine/lib/upgrade.php | 2 +- engine/lib/upgrades/2010033101.php | 2 +- ...012041801-1.8.3-multiple_user_tokens-852225f7fd89f6c5.php | 2 +- engine/lib/views.php | 2 +- engine/tests/api/helpers.php | 2 +- engine/tests/api/metadata.php | 2 +- engine/tests/api/plugins.php | 4 ++-- engine/tests/objects/entities.php | 2 +- engine/tests/objects/objects.php | 2 +- engine/tests/regression/trac_bugs.php | 10 +++++----- engine/tests/test_files/plugin_18/manifest.xml | 2 +- mod/search/README.txt | 2 +- upgrade.php | 2 +- views/default/js/elgg.php | 2 +- 19 files changed, 39 insertions(+), 39 deletions(-) diff --git a/documentation/info/manifest.xml b/documentation/info/manifest.xml index 494158481..4fd4be8ce 100644 --- a/documentation/info/manifest.xml +++ b/documentation/info/manifest.xml @@ -7,7 +7,7 @@ This is a longer, more interesting description of my plugin, its features, and other important information. http://www.elgg.org/ https://github.com/Elgg/Elgg - http://trac.elgg.org + https://github.com/Elgg/Elgg/issues http://elgg.org/supporter.php (C) Elgg 2011 GNU General Public License version 2 diff --git a/engine/classes/ElggAttributeLoader.php b/engine/classes/ElggAttributeLoader.php index 0b770da75..ffc80b02d 100644 --- a/engine/classes/ElggAttributeLoader.php +++ b/engine/classes/ElggAttributeLoader.php @@ -4,7 +4,7 @@ * Loads ElggEntity attributes from DB or validates those passed in via constructor * * @access private - * + * * @package Elgg.Core * @subpackage DataModel */ @@ -69,7 +69,7 @@ class ElggAttributeLoader { /** * Constructor - * + * * @param string $class class of object being loaded * @param string $required_type entity type this is being used to populate * @param array $initialized_attrs attributes after initializeAttributes() has been run @@ -94,7 +94,7 @@ class ElggAttributeLoader { /** * Get primary attributes missing that are missing - * + * * @param stdClass $row Database row * @return array */ @@ -104,7 +104,7 @@ class ElggAttributeLoader { /** * Get secondary attributes that are missing - * + * * @param stdClass $row Database row * @return array */ @@ -114,7 +114,7 @@ class ElggAttributeLoader { /** * Check that the type is correct - * + * * @param stdClass $row Database row * @return void * @throws InvalidClassException @@ -216,7 +216,7 @@ class ElggAttributeLoader { // Note: If there are still missing attributes, we're running on a 1.7 or earlier schema. We let // this pass so the upgrades can run. - // guid needs to be an int http://trac.elgg.org/ticket/4111 + // guid needs to be an int https://github.com/elgg/elgg/issues/4111 $row['guid'] = (int) $row['guid']; return $row; diff --git a/engine/classes/ElggEntity.php b/engine/classes/ElggEntity.php index dd1c7c114..a563f6fad 100644 --- a/engine/classes/ElggEntity.php +++ b/engine/classes/ElggEntity.php @@ -24,7 +24,7 @@ * * @package Elgg.Core * @subpackage DataModel.Entities - * + * * @property string $type object, user, group, or site (read-only after save) * @property string $subtype Further clarifies the nature of the entity (read-only after save) * @property int $guid The unique identifier for this entity (read only) @@ -352,8 +352,8 @@ abstract class ElggEntity extends ElggData implements 'limit' => 0 ); // @todo in 1.9 make this return false if can't add metadata - // http://trac.elgg.org/ticket/4520 - // + // https://github.com/elgg/elgg/issues/4520 + // // need to remove access restrictions right now to delete // because this is the expected behavior $ia = elgg_set_ignore_access(true); @@ -379,7 +379,7 @@ abstract class ElggEntity extends ElggData implements // unsaved entity. store in temp array // returning single entries instead of an array of 1 element is decided in // getMetaData(), just like pulling from the db. - // + // // if overwrite, delete first if (!$multiple || !isset($this->temp_metadata[$name])) { $this->temp_metadata[$name] = array(); @@ -964,7 +964,7 @@ abstract class ElggEntity extends ElggData implements * * @tip Can be overridden by registering for the permissions_check:comment, * plugin hook. - * + * * @param int $user_guid User guid (default is logged in user) * * @return bool @@ -1365,7 +1365,7 @@ abstract class ElggEntity extends ElggData implements $this->attributes['tables_loaded']++; } - // guid needs to be an int http://trac.elgg.org/ticket/4111 + // guid needs to be an int https://github.com/elgg/elgg/issues/4111 $this->attributes['guid'] = (int)$this->attributes['guid']; // Cache object handle diff --git a/engine/lib/database.php b/engine/lib/database.php index 37dfb8f8d..a7949788d 100644 --- a/engine/lib/database.php +++ b/engine/lib/database.php @@ -129,7 +129,7 @@ function establish_db_link($dblinkname = "readwrite") { // Set up cache if global not initialized and query cache not turned off if ((!$DB_QUERY_CACHE) && (!$db_cache_off)) { // @todo if we keep this cache in 1.9, expose the size as a config parameter - $DB_QUERY_CACHE = new ElggLRUCache(200); + $DB_QUERY_CACHE = new ElggLRUCache(200); } } @@ -399,14 +399,14 @@ function elgg_query_runner($query, $callback = null, $single = false) { // Since we want to cache results of running the callback, we need to // need to namespace the query with the callback and single result request. - // http://trac.elgg.org/ticket/4049 + // https://github.com/elgg/elgg/issues/4049 $hash = (string)$callback . (int)$single . $query; // Is cached? if ($DB_QUERY_CACHE) { if (isset($DB_QUERY_CACHE[$hash])) { elgg_log("DB query $query results returned from cache (hash: $hash)", 'NOTICE'); - return $DB_QUERY_CACHE[$hash]; + return $DB_QUERY_CACHE[$hash]; } } @@ -524,7 +524,7 @@ function delete_data($query) { /** * Invalidate the query cache - * + * * @access private */ function _elgg_invalidate_query_cache() { @@ -533,7 +533,7 @@ function _elgg_invalidate_query_cache() { $DB_QUERY_CACHE->clear(); elgg_log("Query cache invalidated", 'NOTICE'); } elseif ($DB_QUERY_CACHE) { - // In case someone sets the cache to an array and primes it with data + // In case someone sets the cache to an array and primes it with data $DB_QUERY_CACHE = array(); elgg_log("Query cache invalidated", 'NOTICE'); } @@ -668,7 +668,7 @@ function run_sql_script($scriptlocation) { /** * Format a query string for logging - * + * * @param string $query Query string * @return string * @access private diff --git a/engine/lib/entities.php b/engine/lib/entities.php index 997db79d2..4fcf1c657 100644 --- a/engine/lib/entities.php +++ b/engine/lib/entities.php @@ -791,7 +791,7 @@ function get_entity($guid) { if ($shared_cache) { $cached_entity = $shared_cache->load($guid); - // @todo store ACLs in memcache http://trac.elgg.org/ticket/3018#comment:3 + // @todo store ACLs in memcache https://github.com/elgg/elgg/issues/3018#issuecomment-13662617 if ($cached_entity) { // @todo use ACL and cached entity access_id to determine if user can see it return $cached_entity; diff --git a/engine/lib/upgrade.php b/engine/lib/upgrade.php index 0cc1e64dc..158ec9ec1 100644 --- a/engine/lib/upgrade.php +++ b/engine/lib/upgrade.php @@ -245,7 +245,7 @@ function version_upgrade() { // No version number? Oh snap...this is an upgrade from a clean installation < 1.7. // Run all upgrades without error reporting and hope for the best. - // See http://trac.elgg.org/elgg/ticket/1432 for more. + // See https://github.com/elgg/elgg/issues/1432 for more. $quiet = !$dbversion; // Note: Database upgrades are deprecated as of 1.8. Use code upgrades. See #1433 diff --git a/engine/lib/upgrades/2010033101.php b/engine/lib/upgrades/2010033101.php index 0bffee001..4779295fd 100644 --- a/engine/lib/upgrades/2010033101.php +++ b/engine/lib/upgrades/2010033101.php @@ -1,7 +1,7 @@ container_guid); diff --git a/engine/tests/api/helpers.php b/engine/tests/api/helpers.php index 10216140f..414fb4145 100644 --- a/engine/tests/api/helpers.php +++ b/engine/tests/api/helpers.php @@ -519,7 +519,7 @@ class ElggCoreHelpersTest extends ElggCoreUnitTest { $this->assertIdentical($elements_sorted_string, $test_elements); } - // see http://trac.elgg.org/ticket/4288 + // see https://github.com/elgg/elgg/issues/4288 public function testElggBatchIncOffset() { // normal increment $options = array( diff --git a/engine/tests/api/metadata.php b/engine/tests/api/metadata.php index 0862341c1..d23510c6a 100644 --- a/engine/tests/api/metadata.php +++ b/engine/tests/api/metadata.php @@ -139,7 +139,7 @@ class ElggCoreMetadataAPITest extends ElggCoreUnitTest { // Make sure metadata with multiple values is correctly deleted when re-written // by another user - // http://trac.elgg.org/ticket/2776 + // https://github.com/elgg/elgg/issues/2776 public function test_elgg_metadata_multiple_values() { $u1 = new ElggUser(); $u1->username = rand(); diff --git a/engine/tests/api/plugins.php b/engine/tests/api/plugins.php index 114f3991b..d0f111c48 100644 --- a/engine/tests/api/plugins.php +++ b/engine/tests/api/plugins.php @@ -69,7 +69,7 @@ class ElggCorePluginsAPITest extends ElggCoreUnitTest { 'description' => 'A longer, more interesting description.', 'website' => 'http://www.elgg.org/', 'repository' => 'https://github.com/Elgg/Elgg', - 'bugtracker' => 'http://trac.elgg.org', + 'bugtracker' => 'https://github.com/elgg/elgg/issues', 'donations' => 'http://elgg.org/supporter.php', 'copyright' => '(C) Elgg Foundation 2011', 'license' => 'GNU General Public License version 2', @@ -174,7 +174,7 @@ class ElggCorePluginsAPITest extends ElggCoreUnitTest { } public function testElggPluginManifestGetBugtracker() { - $this->assertEqual($this->manifest18->getBugTrackerURL(), 'http://trac.elgg.org'); + $this->assertEqual($this->manifest18->getBugTrackerURL(), 'https://github.com/elgg/elgg/issues'); $this->assertEqual($this->manifest17->getBugTrackerURL(), ''); } diff --git a/engine/tests/objects/entities.php b/engine/tests/objects/entities.php index 248b85c9e..bac72079e 100644 --- a/engine/tests/objects/entities.php +++ b/engine/tests/objects/entities.php @@ -271,7 +271,7 @@ class ElggCoreEntityTest extends ElggCoreUnitTest { $this->save_entity(); // test deleting incorrectly - // @link http://trac.elgg.org/ticket/2273 + // @link https://github.com/elgg/elgg/issues/2273 $this->assertNull($this->entity->deleteMetadata('impotent')); $this->assertEqual($this->entity->important, 'indeed!'); diff --git a/engine/tests/objects/objects.php b/engine/tests/objects/objects.php index 915594e0a..263ab2414 100644 --- a/engine/tests/objects/objects.php +++ b/engine/tests/objects/objects.php @@ -194,7 +194,7 @@ class ElggCoreObjectTest extends ElggCoreUnitTest { $old = elgg_set_ignore_access(true); } - // see http://trac.elgg.org/ticket/1196 + // see https://github.com/elgg/elgg/issues/1196 public function testElggEntityRecursiveDisableWhenLoggedOut() { $e1 = new ElggObject(); $e1->access_id = ACCESS_PUBLIC; diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index f173b5b9f..9372b0855 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -201,8 +201,8 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { } /** - * http://trac.elgg.org/ticket/3210 - Don't remove -s in friendly titles - * http://trac.elgg.org/ticket/2276 - improve char encoding + * https://github.com/elgg/elgg/issues/3210 - Don't remove -s in friendly titles + * https://github.com/elgg/elgg/issues/2276 - improve char encoding */ public function test_friendly_title() { $cases = array( @@ -216,7 +216,7 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { => "a-a-a-a-a-a-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // separators trimmed - "-_ hello _-" + "-_ hello _-" => "hello", // accents removed, lower case, other multibyte chars are URL encoded @@ -286,7 +286,7 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { 'web archive anchor google' => 'web archive anchor google', - 'single quotes already anchor yahoo' => + 'single quotes already anchor yahoo' => 'single quotes already anchor yahoo', 'unquoted already anchor yahoo' => @@ -302,7 +302,7 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { /** * Ensure additional select columns do not end up in entity attributes. - * + * * https://github.com/Elgg/Elgg/issues/5538 */ public function test_extra_columns_dont_appear_in_attributes() { diff --git a/engine/tests/test_files/plugin_18/manifest.xml b/engine/tests/test_files/plugin_18/manifest.xml index 5d788616a..c8b407511 100644 --- a/engine/tests/test_files/plugin_18/manifest.xml +++ b/engine/tests/test_files/plugin_18/manifest.xml @@ -7,7 +7,7 @@ A longer, more interesting description. http://www.elgg.org/ https://github.com/Elgg/Elgg - http://trac.elgg.org + https://github.com/elgg/elgg/issues http://elgg.org/supporter.php (C) Elgg Foundation 2011 GNU General Public License version 2 diff --git a/mod/search/README.txt b/mod/search/README.txt index 98a002dd5..ac5930e5f 100644 --- a/mod/search/README.txt +++ b/mod/search/README.txt @@ -273,4 +273,4 @@ MySQL's fulltext engine returns *ZERO* rows if more than 50% of the rows searched match. The default search hooks for users and groups ignore subtypes. -See [trac ticket 1499](http://trac.elgg.org/elgg/ticket/1499) +See [GitHub issue 1499](https://github.com/elgg/elgg/issues/1499) diff --git a/upgrade.php b/upgrade.php index c5f158c61..d07b2a1da 100644 --- a/upgrade.php +++ b/upgrade.php @@ -46,7 +46,7 @@ if (get_input('upgrade') == 'upgrade') { } else { // if upgrading from < 1.8.0, check for the core view 'welcome' and bail if it's found. - // see http://trac.elgg.org/ticket/3064 + // see https://github.com/elgg/elgg/issues/3064 // we're not checking the view itself because it's likely themes will override this view. // we're only concerned with core files. $welcome = dirname(__FILE__) . '/views/default/welcome.php'; diff --git a/views/default/js/elgg.php b/views/default/js/elgg.php index 6fe03484d..c3b56e398 100644 --- a/views/default/js/elgg.php +++ b/views/default/js/elgg.php @@ -43,7 +43,7 @@ $libs = array( foreach ($libs as $file) { include("{$CONFIG->path}js/lib/$file.js"); - // putting a new line between the files to address http://trac.elgg.org/ticket/3081 + // putting a new line between the files to address https://github.com/elgg/elgg/issues/3081 echo "\n"; } -- cgit v1.2.3 From 0deb80da1e82af55bf8d7500d09b36225ddd7927 Mon Sep 17 00:00:00 2001 From: Jerome Bakker Date: Wed, 23 Oct 2013 16:03:08 +0200 Subject: found some more references to trac --- engine/lib/elgglib.php | 4 ++-- engine/lib/views.php | 4 ++-- engine/tests/objects/users.php | 2 +- engine/tests/regression/trac_bugs.php | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/engine/lib/elgglib.php b/engine/lib/elgglib.php index b5ef7e572..c95e0c28c 100644 --- a/engine/lib/elgglib.php +++ b/engine/lib/elgglib.php @@ -1350,7 +1350,7 @@ function full_url() { "" : (":" . $_SERVER["SERVER_PORT"]); // This is here to prevent XSS in poorly written browsers used by 80% of the population. - // {@trac [5813]} + // https://github.com/Elgg/Elgg/commit/0c947e80f512cb0a482b1864fd0a6965c8a0cd4a $quotes = array('\'', '"'); $encoded = array('%27', '%22'); @@ -2249,7 +2249,7 @@ function elgg_api_test($hook, $type, $value, $params) { * * @warning ACCESS_DEFAULT is a place holder for the input/access view. Do not * use it when saving an entity. - * + * * @var int */ define('ACCESS_DEFAULT', -1); diff --git a/engine/lib/views.php b/engine/lib/views.php index dc69395c6..fff3581cf 100644 --- a/engine/lib/views.php +++ b/engine/lib/views.php @@ -369,7 +369,7 @@ function elgg_view_exists($view, $viewtype = '', $recurse = true) { * view, $view_name plugin hook. * * @warning Any variables in $_SESSION will override passed vars - * upon name collision. See {@trac #2124}. + * upon name collision. See https://github.com/Elgg/Elgg/issues/2124 * * @param string $view The name and location of the view to use * @param array $vars Variables to pass to the view. @@ -795,7 +795,7 @@ function elgg_view_menu($menu_name, array $vars = array()) { * - bool 'full_view' Whether to show a full or condensed view. * * @tip This function can automatically appends annotations to entities if in full - * view and a handler is registered for the entity:annotate. See {@trac 964} and + * view and a handler is registered for the entity:annotate. See https://github.com/Elgg/Elgg/issues/964 and * {@link elgg_view_entity_annotations()}. * * @param ElggEntity $entity The entity to display diff --git a/engine/tests/objects/users.php b/engine/tests/objects/users.php index 7d2ef6961..8a1033ac4 100644 --- a/engine/tests/objects/users.php +++ b/engine/tests/objects/users.php @@ -145,7 +145,7 @@ class ElggCoreUserTest extends ElggCoreUnitTest { } public function testElggUserNameCache() { - // Trac #1305 + // issue https://github.com/elgg/elgg/issues/1305 // very unlikely a user would have this username $name = (string)time(); diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index 9372b0855..f823825ab 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -1,7 +1,7 @@ Date: Wed, 23 Oct 2013 23:42:30 -0700 Subject: Revert "move deactivate event registration to the user-triggered action" This reverts commit 6da43b70ca0de807c0532adb0bba65405d3ffbc1. --- actions/admin/plugins/deactivate.php | 3 --- engine/lib/plugins.php | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/actions/admin/plugins/deactivate.php b/actions/admin/plugins/deactivate.php index adb86dd7a..354f4717d 100644 --- a/actions/admin/plugins/deactivate.php +++ b/actions/admin/plugins/deactivate.php @@ -10,9 +10,6 @@ * @package Elgg.Core * @subpackage Administration.Plugins */ - - // prevent disabling plugins with active dependents - elgg_register_event_handler('deactivate', 'plugin', 'plugins_deactivate_dependency_check'); $plugin_guids = get_input('plugin_guids'); diff --git a/engine/lib/plugins.php b/engine/lib/plugins.php index 1b7ad5db9..e0aa705bb 100644 --- a/engine/lib/plugins.php +++ b/engine/lib/plugins.php @@ -1148,6 +1148,8 @@ function plugin_init() { run_function_once("plugin_run_once"); elgg_register_plugin_hook_handler('unit_test', 'system', 'plugins_test'); + + elgg_register_event_handler('deactivate', 'plugin', 'plugins_deactivate_dependency_check'); elgg_register_action("plugins/settings/save", '', 'admin'); elgg_register_action("plugins/usersettings/save"); -- cgit v1.2.3 From 9762edd4305ab8e6523d2f8171a32688295f1c88 Mon Sep 17 00:00:00 2001 From: Matt Beckett Date: Wed, 23 Oct 2013 23:52:53 -0700 Subject: Added comments regarding deactivation due to error --- engine/lib/plugins.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/engine/lib/plugins.php b/engine/lib/plugins.php index e0aa705bb..c296346fc 100644 --- a/engine/lib/plugins.php +++ b/engine/lib/plugins.php @@ -1149,6 +1149,8 @@ function plugin_init() { elgg_register_plugin_hook_handler('unit_test', 'system', 'plugins_test'); + // note - plugins are booted by the time this handler is registered + // deactivation due to error may have already occurred elgg_register_event_handler('deactivate', 'plugin', 'plugins_deactivate_dependency_check'); elgg_register_action("plugins/settings/save", '', 'admin'); -- cgit v1.2.3 From 1e884c6abe933468ecfa035780bd3e9ff5e1ad61 Mon Sep 17 00:00:00 2001 From: Matt Beckett Date: Thu, 10 Oct 2013 13:29:50 -0700 Subject: #5952 - fix infinite loop when disabling > 50 annotations or metadata --- engine/lib/annotations.php | 6 +++++- engine/lib/metadata.php | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/engine/lib/annotations.php b/engine/lib/annotations.php index 124e67e0f..5e9b530de 100644 --- a/engine/lib/annotations.php +++ b/engine/lib/annotations.php @@ -249,9 +249,13 @@ function elgg_disable_annotations(array $options) { if (!elgg_is_valid_options_for_batch_operation($options, 'annotations')) { return false; } + + // if we can see hidden (disabled) we need to use the offset + // otherwise we risk an infinite loop if there are more than 50 + $inc_offset = access_get_show_hidden_status(); $options['metastring_type'] = 'annotations'; - return elgg_batch_metastring_based_objects($options, 'elgg_batch_disable_callback', false); + return elgg_batch_metastring_based_objects($options, 'elgg_batch_disable_callback', $inc_offset); } /** diff --git a/engine/lib/metadata.php b/engine/lib/metadata.php index d2f8d4cd4..fdb1b85f6 100644 --- a/engine/lib/metadata.php +++ b/engine/lib/metadata.php @@ -333,9 +333,13 @@ function elgg_disable_metadata(array $options) { } elgg_get_metadata_cache()->invalidateByOptions('disable', $options); + + // if we can see hidden (disabled) we need to use the offset + // otherwise we risk an infinite loop if there are more than 50 + $inc_offset = access_get_show_hidden_status(); $options['metastring_type'] = 'metadata'; - return elgg_batch_metastring_based_objects($options, 'elgg_batch_disable_callback', false); + return elgg_batch_metastring_based_objects($options, 'elgg_batch_disable_callback', $inc_offset); } /** -- cgit v1.2.3 From 8a79dc3904a2fda8081216fdf52be6a462db2709 Mon Sep 17 00:00:00 2001 From: Paweł Sroka Date: Sun, 27 Oct 2013 12:17:05 +0100 Subject: Refs #5952 - Added test --- engine/tests/regression/trac_bugs.php | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index f823825ab..ef1348cf6 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -332,4 +332,45 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { $group->delete(); } + + /** + * Ensure that ElggBatch doesn't go into infinite loop when disabling annotations recursively when show hidden is enabled. + * + * https://github.com/Elgg/Elgg/issues/5952 + */ + public function test_disabling_annotations_infinite_loop() { + + //let's have some entity + $group = new ElggGroup(); + $group->name = 'test_group'; + $group->access_id = ACCESS_PUBLIC; + $this->assertTrue($group->save() !== false); + + $total = 51; + //add some annotations + for ($cnt = 0; $cnt < $total; $cnt++) { + $group->annotate('test_annotation', 'value_' . $total); + } + + //disable them + $show_hidden = access_get_show_hidden_status(); + access_show_hidden_entities(true); + $options = array( + 'guid' => $group->guid, + 'limit' => $total, //using strict limit to avoid real infinite loop and just see ElggBatch limiting on it before finishing the work + ); + elgg_disable_annotations($options); + access_show_hidden_entities($show_hidden); + + //confirm all being disabled + $annotations = $group->getAnnotations(array( + 'limit' => $total, + )); + foreach ($annotations as $annotation) { + $this->assertTrue($annotation->enabled == 'no'); + } + + //delete group and annotations + $group->delete(); + } } -- cgit v1.2.3 From 61d049487a4f1a72f79f8c49bb65ffa82825e378 Mon Sep 17 00:00:00 2001 From: Paweł Sroka Date: Sun, 27 Oct 2013 13:28:16 +0100 Subject: Refs #6172 - Fixed one more trac mention --- engine/lib/elgglib.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/lib/elgglib.php b/engine/lib/elgglib.php index c95e0c28c..34111c69d 100644 --- a/engine/lib/elgglib.php +++ b/engine/lib/elgglib.php @@ -746,7 +746,7 @@ function elgg_unregister_event_handler($event, $object_type, $callback) { * @tip When referring to events, the preferred syntax is "event, type". * * @internal Only rarely should events be changed, added, or removed in core. - * When making changes to events, be sure to first create a ticket in trac. + * When making changes to events, be sure to first create a ticket on Github. * * @internal @tip Think of $object_type as the primary namespace element, and * $event as the secondary namespace. -- cgit v1.2.3 From 33260fd7a88e5e92fbee6ee0719ab4286e9ce221 Mon Sep 17 00:00:00 2001 From: Paweł Sroka Date: Sun, 27 Oct 2013 19:27:14 +0100 Subject: Refs #6117 - Added docs + changed plugins_deactivate_dependency_check to _plugins_deactivate_dependency_check --- engine/lib/plugins.php | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/engine/lib/plugins.php b/engine/lib/plugins.php index c296346fc..d5d3db466 100644 --- a/engine/lib/plugins.php +++ b/engine/lib/plugins.php @@ -1104,7 +1104,17 @@ function plugins_test($hook, $type, $value, $params) { return $value; } -function plugins_deactivate_dependency_check($event, $type, $params) { +/** + * Checks on deactivate plugin event if disabling it won't create unmet dependencies and blocks disable in such case. + * + * @param string $event deactivate + * @param string $type plugin + * @param array $params Parameters array containing entry with ELggPlugin instance under 'plugin_entity' key + * @return bool false to block plugin deactivation action + * + * @access private + */ +function _plugins_deactivate_dependency_check($event, $type, $params) { $plugin_id = $params['plugin_entity']->getManifest()->getPluginID(); $plugin_name = $params['plugin_entity']->getManifest()->getName(); @@ -1114,7 +1124,7 @@ function plugins_deactivate_dependency_check($event, $type, $params) { foreach ($active_plugins as $plugin) { $manifest = $plugin->getManifest(); $requires = $manifest->getRequires(); - + foreach ($requires as $required) { if ($required['type'] == 'plugin' && $required['name'] == $plugin_id) { // there are active dependents @@ -1130,11 +1140,11 @@ function plugins_deactivate_dependency_check($event, $type, $params) { $list .= '
    • ' . $dependent->getManifest()->getName() . '
    • '; } $list .= '
    '; - + register_error(elgg_echo('ElggPlugin:Dependencies:ActiveDependent', array($plugin_name, $list))); - + return false; - } + } } /** @@ -1151,7 +1161,7 @@ function plugin_init() { // note - plugins are booted by the time this handler is registered // deactivation due to error may have already occurred - elgg_register_event_handler('deactivate', 'plugin', 'plugins_deactivate_dependency_check'); + elgg_register_event_handler('deactivate', 'plugin', '_plugins_deactivate_dependency_check'); elgg_register_action("plugins/settings/save", '', 'admin'); elgg_register_action("plugins/usersettings/save"); -- cgit v1.2.3 From 5a6b259a320256c2f75828193aa8da93eebe226e Mon Sep 17 00:00:00 2001 From: Juho Jaakkola Date: Thu, 31 Oct 2013 17:01:15 +0200 Subject: Fixes #6030 - Hide group bookmarklet from non-members --- mod/bookmarks/start.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mod/bookmarks/start.php b/mod/bookmarks/start.php index a5685388b..caea43587 100644 --- a/mod/bookmarks/start.php +++ b/mod/bookmarks/start.php @@ -285,8 +285,11 @@ function bookmarks_page_menu($hook, $type, $return, $params) { if (!$page_owner) { $page_owner = elgg_get_logged_in_user_entity(); } - + if ($page_owner instanceof ElggGroup) { + if (!$page_owner->isMember()) { + return $return; + } $title = elgg_echo('bookmarks:bookmarklet:group'); } else { $title = elgg_echo('bookmarks:bookmarklet'); -- cgit v1.2.3 From d53447f7e6b3277f3249d9a70e56ec01a90c3a60 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Thu, 11 Jul 2013 13:24:01 -0400 Subject: Disable loading external entities during XML parsing --- engine/classes/ElggAutoP.php | 14 ++++++++++++++ engine/classes/ElggXMLElement.php | 8 ++++++-- engine/tests/regression/trac_bugs.php | 10 ++++++++++ engine/tests/test_files/xxe/external_entity.txt | 1 + engine/tests/test_files/xxe/request.xml | 8 ++++++++ 5 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 engine/tests/test_files/xxe/external_entity.txt create mode 100644 engine/tests/test_files/xxe/request.xml diff --git a/engine/classes/ElggAutoP.php b/engine/classes/ElggAutoP.php index 71536c433..05842d1b2 100644 --- a/engine/classes/ElggAutoP.php +++ b/engine/classes/ElggAutoP.php @@ -110,12 +110,19 @@ class ElggAutoP { // http://www.php.net/manual/en/domdocument.loadhtml.php#95463 libxml_use_internal_errors(true); + // Do not load entities. May be unnecessary, better safe than sorry + $disable_load_entities = libxml_disable_entity_loader(true); + if (!$this->_doc->loadHTML("{$html}" . "")) { + + libxml_disable_entity_loader($disable_load_entities); return false; } + libxml_disable_entity_loader($disable_load_entities); + $this->_xpath = new DOMXPath($this->_doc); // start processing recursively at the BODY element $nodeList = $this->_xpath->query('//body[1]'); @@ -135,9 +142,16 @@ class ElggAutoP { // re-parse so we can handle new AUTOP elements + // Do not load entities. May be unnecessary, better safe than sorry + $disable_load_entities = libxml_disable_entity_loader(true); + if (!$this->_doc->loadHTML($html)) { + libxml_disable_entity_loader($disable_load_entities); return false; } + + libxml_disable_entity_loader($disable_load_entities); + // must re-create XPath object after DOM load $this->_xpath = new DOMXPath($this->_doc); diff --git a/engine/classes/ElggXMLElement.php b/engine/classes/ElggXMLElement.php index 6f2633e25..cbd3fc5ce 100644 --- a/engine/classes/ElggXMLElement.php +++ b/engine/classes/ElggXMLElement.php @@ -20,7 +20,12 @@ class ElggXMLElement { if ($xml instanceof SimpleXMLElement) { $this->_element = $xml; } else { + // do not load entities + $disable_load_entities = libxml_disable_entity_loader(true); + $this->_element = new SimpleXMLElement($xml); + + libxml_disable_entity_loader($disable_load_entities); } } @@ -123,5 +128,4 @@ class ElggXMLElement { } return false; } - -} \ No newline at end of file +} diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index ef1348cf6..e6773c8af 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -373,4 +373,14 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { //delete group and annotations $group->delete(); } + + public function test_ElggXMLElement_does_not_load_external_entities() { + $payload = file_get_contents(dirname(dirname(__FILE__)) . '/test_files/xxe/request.xml'); + $payload = sprintf($payload, 'file://' . realpath(dirname(dirname(__FILE__)) . '/test_files/xxe/external_entity.txt')); + + $el = new ElggXMLElement($payload); + $chidren = $el->getChildren(); + $content = $chidren[0]->getContent(); + $this->assertNoPattern('/secret/', $content); + } } diff --git a/engine/tests/test_files/xxe/external_entity.txt b/engine/tests/test_files/xxe/external_entity.txt new file mode 100644 index 000000000..536aca34d --- /dev/null +++ b/engine/tests/test_files/xxe/external_entity.txt @@ -0,0 +1 @@ +secret \ No newline at end of file diff --git a/engine/tests/test_files/xxe/request.xml b/engine/tests/test_files/xxe/request.xml new file mode 100644 index 000000000..4390f9db2 --- /dev/null +++ b/engine/tests/test_files/xxe/request.xml @@ -0,0 +1,8 @@ + + + +]> + + test&xxe;test + -- cgit v1.2.3 From 6eec301f33ff3e618d591d429de7edf30277e972 Mon Sep 17 00:00:00 2001 From: Paweł Sroka Date: Tue, 23 Jul 2013 08:28:30 +0200 Subject: Enhanced test --- engine/tests/regression/trac_bugs.php | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index e6773c8af..ea39253df 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -375,12 +375,26 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { } public function test_ElggXMLElement_does_not_load_external_entities() { + $elLast = libxml_disable_entity_loader(false); + $payload = file_get_contents(dirname(dirname(__FILE__)) . '/test_files/xxe/request.xml'); - $payload = sprintf($payload, 'file://' . realpath(dirname(dirname(__FILE__)) . '/test_files/xxe/external_entity.txt')); + $path = realpath(dirname(dirname(__FILE__)) . '/test_files/xxe/external_entity.txt'); + $path = str_replace('\\', '/', $path); + if ($path[0] != '/') { + $path = '/' . $path; + } + $path = 'file://' . $path; + $payload = sprintf($payload, $path); $el = new ElggXMLElement($payload); $chidren = $el->getChildren(); $content = $chidren[0]->getContent(); $this->assertNoPattern('/secret/', $content); + + //make sure the test is valid + $element = new SimpleXMLElement($payload); + $this->assertPattern('/secret/', (string)$element->methodName); + + libxml_disable_entity_loader($elLast); } } -- cgit v1.2.3 From 7cacdc8bc26c98a58dc8986acfd911d6542608af Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Wed, 31 Jul 2013 13:34:55 -0400 Subject: Emit notice if XXE can't be tested and skip test --- engine/tests/regression/trac_bugs.php | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/engine/tests/regression/trac_bugs.php b/engine/tests/regression/trac_bugs.php index ea39253df..689275661 100644 --- a/engine/tests/regression/trac_bugs.php +++ b/engine/tests/regression/trac_bugs.php @@ -377,6 +377,7 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { public function test_ElggXMLElement_does_not_load_external_entities() { $elLast = libxml_disable_entity_loader(false); + // build payload that should trigger loading of external entity $payload = file_get_contents(dirname(dirname(__FILE__)) . '/test_files/xxe/request.xml'); $path = realpath(dirname(dirname(__FILE__)) . '/test_files/xxe/external_entity.txt'); $path = str_replace('\\', '/', $path); @@ -384,16 +385,20 @@ class ElggCoreRegressionBugsTest extends ElggCoreUnitTest { $path = '/' . $path; } $path = 'file://' . $path; - $payload = sprintf($payload, $path); + $payload = sprintf($payload, $path); - $el = new ElggXMLElement($payload); - $chidren = $el->getChildren(); - $content = $chidren[0]->getContent(); - $this->assertNoPattern('/secret/', $content); - - //make sure the test is valid + // make sure we can actually this in this environment $element = new SimpleXMLElement($payload); - $this->assertPattern('/secret/', (string)$element->methodName); + $can_load_entity = preg_match('/secret/', (string)$element->methodName); + + $this->skipUnless($can_load_entity, "XXE vulnerability cannot be tested on this system"); + + if ($can_load_entity) { + $el = new ElggXMLElement($payload); + $chidren = $el->getChildren(); + $content = $chidren[0]->getContent(); + $this->assertNoPattern('/secret/', $content); + } libxml_disable_entity_loader($elLast); } -- cgit v1.2.3 From e98f933857548be9cd078416a93011ea9c2f3e3a Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Mon, 10 Jun 2013 23:16:45 -0400 Subject: Allow regenerating site secret --- actions/admin/site/regenerate_secret.php | 11 ++ engine/classes/ElggCrypto.php | 134 +++++++++++++++++++++ engine/lib/actions.php | 27 ++++- engine/lib/admin.php | 2 + ...3060900-1.8.15-site_secret-404fc165cf9e0ac9.php | 13 ++ languages/en.php | 18 ++- .../admin/settings/advanced/site_secret.php | 11 ++ views/default/css/admin.php | 20 +++ .../default/forms/admin/site/regenerate_secret.php | 24 ++++ 9 files changed, 257 insertions(+), 3 deletions(-) create mode 100644 actions/admin/site/regenerate_secret.php create mode 100644 engine/classes/ElggCrypto.php create mode 100644 engine/lib/upgrades/2013060900-1.8.15-site_secret-404fc165cf9e0ac9.php create mode 100644 views/default/admin/settings/advanced/site_secret.php create mode 100644 views/default/forms/admin/site/regenerate_secret.php diff --git a/actions/admin/site/regenerate_secret.php b/actions/admin/site/regenerate_secret.php new file mode 100644 index 000000000..3112fb5f3 --- /dev/null +++ b/actions/admin/site/regenerate_secret.php @@ -0,0 +1,11 @@ +='); + } + // /dev/urandom is available on many *nix systems and is considered the + // best commonly available pseudo-random source. + if ($fh = @fopen('/dev/urandom', 'rb')) { + // PHP only performs buffered reads, so in reality it will always read + // at least 4096 bytes. Thus, it costs nothing extra to read and store + // that much so as to speed any additional invocations. + $bytes .= fread($fh, max(4096, $count)); + fclose($fh); + } elseif ($php_compatible && function_exists('openssl_random_pseudo_bytes')) { + // openssl_random_pseudo_bytes() will find entropy in a system-dependent + // way. + $bytes .= openssl_random_pseudo_bytes($count - strlen($bytes)); + } + // If /dev/urandom is not available or returns no bytes, this loop will + // generate a good set of pseudo-random bytes on any system. + // Note that it may be important that our $random_state is passed + // through hash() prior to being rolled into $output, that the two hash() + // invocations are different, and that the extra input into the first one - + // the microtime() - is prepended rather than appended. This is to avoid + // directly leaking $random_state via the $output stream, which could + // allow for trivial prediction of further "random" numbers. + while (strlen($bytes) < $count) { + $random_state = hash('sha256', microtime() . mt_rand() . $random_state); + $bytes .= hash('sha256', mt_rand() . $random_state, true); + } + } + $output = substr($bytes, 0, $count); + $bytes = substr($bytes, $count); + return $output; + } + + /** + * Generate a random string of specified length. + * + * Uses supplied character list for generating the new string. + * If no character list provided - uses Base64 URL character set. + * + * @param int $length Desired length of the string + * @param string|null $chars Characters to be chosen from randomly. If not given, the Base64 URL + * charset will be used. + * + * @return string The random string + * + * @throws InvalidArgumentException + * + * @copyright Copyright (c) 2005-2013 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://framework.zend.com/license/new-bsd New BSD License + * + * @see https://github.com/zendframework/zf2/blob/master/library/Zend/Math/Rand.php#L179 + */ + public static function getRandomString($length, $chars = null) + { + if ($length < 1) { + throw new InvalidArgumentException('Length should be >= 1'); + } + + if (empty($chars)) { + $numBytes = ceil($length * 0.75); + $bytes = self::getRandomBytes($numBytes); + $string = substr(rtrim(base64_encode($bytes), '='), 0, $length); + + // Base64 URL + return strtr($string, '+/', '-_'); + } + + $listLen = strlen($chars); + + if ($listLen == 1) { + return str_repeat($chars, $length); + } + + $bytes = self::getRandomBytes($length); + $pos = 0; + $result = ''; + for ($i = 0; $i < $length; $i++) { + $pos = ($pos + ord($bytes[$i])) % $listLen; + $result .= $chars[$pos]; + } + + return $result; + } +} diff --git a/engine/lib/actions.php b/engine/lib/actions.php index 56936f582..8047914ac 100644 --- a/engine/lib/actions.php +++ b/engine/lib/actions.php @@ -364,16 +364,19 @@ function generate_action_token($timestamp) { } /** - * Initialise the site secret hash. + * Initialise the site secret (32 bytes: "z" to indicate format + 186-bit key in Base64 URL). * * Used during installation and saves as a datalist. * + * Note: Old secrets were hex encoded. + * * @return mixed The site secret hash or false * @access private * @todo Move to better file. */ function init_site_secret() { - $secret = md5(rand() . microtime()); + $secret = 'z' . ElggCrypto::getRandomString(31); + if (datalist_set('__site_secret__', $secret)) { return $secret; } @@ -399,6 +402,26 @@ function get_site_secret() { return $secret; } +/** + * Get the strength of the site secret + * + * @return string "strong", "moderate", or "weak" + * @access private + */ +function _elgg_get_site_secret_strength() { + $secret = get_site_secret(); + if ($secret[0] !== 'z') { + $rand_max = getrandmax(); + if ($rand_max < pow(2, 16)) { + return 'weak'; + } + if ($rand_max < pow(2, 32)) { + return 'moderate'; + } + } + return 'strong'; +} + /** * Check if an action is registered and its script exists. * diff --git a/engine/lib/admin.php b/engine/lib/admin.php index 7f82108c0..f36f29668 100644 --- a/engine/lib/admin.php +++ b/engine/lib/admin.php @@ -236,6 +236,7 @@ function admin_init() { elgg_register_action('admin/site/update_advanced', '', 'admin'); elgg_register_action('admin/site/flush_cache', '', 'admin'); elgg_register_action('admin/site/unlock_upgrade', '', 'admin'); + elgg_register_action('admin/site/regenerate_secret', '', 'admin'); elgg_register_action('admin/menu/save', '', 'admin'); @@ -291,6 +292,7 @@ function admin_init() { elgg_register_admin_menu_item('configure', 'settings', null, 100); elgg_register_admin_menu_item('configure', 'basic', 'settings', 10); elgg_register_admin_menu_item('configure', 'advanced', 'settings', 20); + elgg_register_admin_menu_item('configure', 'advanced/site_secret', 'settings', 25); elgg_register_admin_menu_item('configure', 'menu_items', 'appearance', 30); elgg_register_admin_menu_item('configure', 'profile_fields', 'appearance', 40); // default widgets is added via an event handler elgg_default_widgets_init() in widgets.php diff --git a/engine/lib/upgrades/2013060900-1.8.15-site_secret-404fc165cf9e0ac9.php b/engine/lib/upgrades/2013060900-1.8.15-site_secret-404fc165cf9e0ac9.php new file mode 100644 index 000000000..b5b614762 --- /dev/null +++ b/engine/lib/upgrades/2013060900-1.8.15-site_secret-404fc165cf9e0ac9.php @@ -0,0 +1,13 @@ + 'Settings', 'admin:settings:basic' => 'Basic Settings', 'admin:settings:advanced' => 'Advanced Settings', + 'admin:settings:advanced/site_secret' => 'Site Secret', 'admin:site:description' => "This admin panel allows you to control global settings for your site. Choose an option below to get started.", + 'admin:settings:advanced:site_secret' => 'Site Secret', 'admin:site:opt:linktext' => "Configure site...", 'admin:site:access:warning' => "Changing the access setting only affects the permissions on content created in the future.", + 'admin:site:secret:intro' => 'Elgg uses a key to create security tokens for various purposes.', + 'admin:site:secret_regenerated' => "Your site secret has been regenerated.", + 'admin:site:secret:regenerate' => "Regenerate site secret", + 'admin:site:secret:regenerate:help' => "Note: This may inconvenience some users by invalidating tokens used in \"remember me\" cookies, e-mail validation requests, invitation codes, etc.", + 'site_secret:current_strength' => 'Key Strength', + 'site_secret:strength:weak' => "Weak", + 'site_secret:strength_msg:weak' => "We strongly recommend that you regenerate your site secret.", + 'site_secret:strength:moderate' => "Moderate", + 'site_secret:strength_msg:moderate' => "We recommend you regenerate your site secret for the best site security.", + 'site_secret:strength:strong' => "Strong", + 'site_secret:strength_msg:strong' => "✓ Your site secret is sufficiently strong.", + 'admin:dashboard' => 'Dashboard', 'admin:widget:online_users' => 'Online users', 'admin:widget:online_users:help' => 'Lists the users currently on the site', @@ -1064,7 +1078,7 @@ Once you have logged in, we highly recommend that you change your password. 'upgrade:unlock' => 'Unlock upgrade', 'upgrade:unlock:confirm' => "The database is locked for another upgrade. Running concurrent upgrades is dangerous. You should only continue if you know there is not another upgrade running. Unlock?", 'upgrade:locked' => "Cannot upgrade. Another upgrade is running. To clear the upgrade lock, visit the Admin section.", - 'upgrade:unlock:success' => "Upgrade unlocked suscessfully.", + 'upgrade:unlock:success' => "Upgrade unlocked successfully.", 'upgrade:unable_to_upgrade' => 'Unable to upgrade.', 'upgrade:unable_to_upgrade_info' => 'This installation cannot be upgraded because legacy views @@ -1079,6 +1093,8 @@ Once you have logged in, we highly recommend that you change your password. 'update:twitter_api:deactivated' => 'Twitter API (previously Twitter Service) was deactivated during the upgrade. Please activate it manually if required.', 'update:oauth_api:deactivated' => 'OAuth API (previously OAuth Lib) was deactivated during the upgrade. Please activate it manually if required.', + 'upgrade:site_secret_warning:moderate' => "You are encouraged to regenerate your site key to improve system security. See Configure > Site Secret", + 'upgrade:site_secret_warning:weak' => "You are strongly encouraged to regenerate your site key to improve system security. See Configure > Site Secret", 'deprecated:function' => '%s() was deprecated by %s()', diff --git a/views/default/admin/settings/advanced/site_secret.php b/views/default/admin/settings/advanced/site_secret.php new file mode 100644 index 000000000..e70ac7ab6 --- /dev/null +++ b/views/default/admin/settings/advanced/site_secret.php @@ -0,0 +1,11 @@ + _elgg_get_site_secret_strength(), +)); diff --git a/views/default/css/admin.php b/views/default/css/admin.php index 3896ded5d..c435621b2 100644 --- a/views/default/css/admin.php +++ b/views/default/css/admin.php @@ -1543,6 +1543,26 @@ table.mceLayout { margin: 0 0 1em 2em; } +/* *************************************** + SITE SECRET +*************************************** */ +.elgg-form-admin-site-regenerate-secret table { + width: 60%; + margin: 1em auto; +} +td.elgg-strength-strong, +td.elgg-strength-strong h4 { + background: #DFF0D8; color: #468847; +} +td.elgg-strength-moderate, +td.elgg-strength-moderate h4 { + background: #FCF8E3; color: #C09853; +} +td.elgg-strength-weak, +td.elgg-strength-weak h4 { + background: #F2DEDE; color: #B94A48; +} + /* *************************************** HELPERS *************************************** */ diff --git a/views/default/forms/admin/site/regenerate_secret.php b/views/default/forms/admin/site/regenerate_secret.php new file mode 100644 index 000000000..af269b801 --- /dev/null +++ b/views/default/forms/admin/site/regenerate_secret.php @@ -0,0 +1,24 @@ + +

    + + + + + + +
    +

    +
    +
    + +
    + elgg_echo('admin:site:secret:regenerate'), + 'class' => 'elgg-requires-confirmation elgg-button elgg-button-submit', + )); ?> +

    +
    -- cgit v1.2.3 From 4bcca223409915e075dd08f0aaca9f23ea63f610 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Thu, 13 Jun 2013 14:05:33 -0400 Subject: PRNG replace Drupal's with George Argyros' --- engine/classes/ElggCrypto.php | 181 +++++++++++++++++++++++++++++------------- 1 file changed, 128 insertions(+), 53 deletions(-) diff --git a/engine/classes/ElggCrypto.php b/engine/classes/ElggCrypto.php index 364af4542..358b721ea 100644 --- a/engine/classes/ElggCrypto.php +++ b/engine/classes/ElggCrypto.php @@ -15,70 +15,145 @@ class ElggCrypto { const CHARS_PASSWORD = 'bcdfghjklmnpqrstvwxyz2346789'; /** - * Returns a string of highly randomized bytes (over the full 8-bit range). + * Generate a string of highly randomized bytes (over the full 8-bit range). * - * This function is better than simply calling mt_rand() or any other built-in - * PHP function because it can return a long string of bytes (compared to < 4 - * bytes normally from mt_rand()) and uses the best available pseudo-random - * source. + * @param int $length Number of bytes needed + * @return string Random bytes * - * @param int $count The number of characters (bytes) to return in the string. - * @return string + * @author George Argyros + * @copyright 2012, George Argyros. All rights reserved. + * @license Modified BSD + * @link https://github.com/GeorgeArgyros/Secure-random-bytes-in-PHP/blob/master/srand.php Original * - * @copyright Copyright 2001 - 2012 by the original authors - * https://github.com/drupal/drupal/blob/7.x/COPYRIGHT.txt - * @license https://github.com/drupal/drupal/blob/7.x/LICENSE.txt GPL 2 + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. * - * @see https://github.com/drupal/drupal/blob/7.x/includes/bootstrap.inc#L1942 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL GEORGE ARGYROS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - public static function getRandomBytes($count) { - // $random_state does not use drupal_static as it stores random bytes. - static $random_state, $bytes, $php_compatible; - // Initialize on the first call. The contents of $_SERVER includes a mix of - // user-specific and system information that varies a little with each page. - if (!isset($random_state)) { - $random_state = print_r($_SERVER, true); - if (function_exists('getmypid')) { - // Further initialize with the somewhat random PHP process ID. - $random_state .= getmypid(); + public function getRandomBytes($length) { + /** + * Our primary choice for a cryptographic strong randomness function is + * openssl_random_pseudo_bytes. + */ + $SSLstr = '4'; // http://xkcd.com/221/ + if (function_exists('openssl_random_pseudo_bytes') + && (version_compare(PHP_VERSION, '5.3.4') >= 0 || substr(PHP_OS, 0, 3) !== 'WIN')) { + $SSLstr = openssl_random_pseudo_bytes($length, $strong); + if ($strong) { + return $SSLstr; } - $bytes = ''; } - if (strlen($bytes) < $count) { - // PHP versions prior 5.3.4 experienced openssl_random_pseudo_bytes() - // locking on Windows and rendered it unusable. - if (!isset($php_compatible)) { - $php_compatible = version_compare(PHP_VERSION, '5.3.4', '>='); + + /** + * If mcrypt extension is available then we use it to gather entropy from + * the operating system's PRNG. This is better than reading /dev/urandom + * directly since it avoids reading larger blocks of data than needed. + * Older versions of mcrypt_create_iv may be broken or take too much time + * to finish so we only use this function with PHP 5.3.7 and above. + * @see https://bugs.php.net/bug.php?id=55169 + */ + if (function_exists('mcrypt_create_iv') + && (version_compare(PHP_VERSION, '5.3.7') >= 0 || substr(PHP_OS, 0, 3) !== 'WIN')) { + $str = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM); + if ($str !== false) { + return $str; + } + } + + /** + * No build-in crypto randomness function found. We collect any entropy + * available in the PHP core PRNGs along with some filesystem info and memory + * stats. To make this data cryptographically strong we add data either from + * /dev/urandom or if its unavailable, we gather entropy by measuring the + * time needed to compute a number of SHA-1 hashes. + */ + $str = ''; + $bits_per_round = 2; // bits of entropy collected in each clock drift round + $msec_per_round = 400; // expected running time of each round in microseconds + $hash_len = 20; // SHA-1 Hash length + $total = $length; // total bytes of entropy to collect + + $handle = @fopen('/dev/urandom', 'rb'); + if ($handle && function_exists('stream_set_read_buffer')) { + @stream_set_read_buffer($handle, 0); + } + + do { + $bytes = ($total > $hash_len)? $hash_len : $total; + $total -= $bytes; + + //collect any entropy available from the PHP system and filesystem + $entropy = rand() . uniqid(mt_rand(), true) . $SSLstr; + $entropy .= implode('', @fstat(@fopen( __FILE__, 'r'))); + $entropy .= memory_get_usage() . getmypid(); + $entropy .= serialize($_ENV) . serialize($_SERVER); + if (function_exists('posix_times')) { + $entropy .= serialize(posix_times()); } - // /dev/urandom is available on many *nix systems and is considered the - // best commonly available pseudo-random source. - if ($fh = @fopen('/dev/urandom', 'rb')) { - // PHP only performs buffered reads, so in reality it will always read - // at least 4096 bytes. Thus, it costs nothing extra to read and store - // that much so as to speed any additional invocations. - $bytes .= fread($fh, max(4096, $count)); - fclose($fh); - } elseif ($php_compatible && function_exists('openssl_random_pseudo_bytes')) { - // openssl_random_pseudo_bytes() will find entropy in a system-dependent - // way. - $bytes .= openssl_random_pseudo_bytes($count - strlen($bytes)); + if (function_exists('zend_thread_id')) { + $entropy .= zend_thread_id(); } - // If /dev/urandom is not available or returns no bytes, this loop will - // generate a good set of pseudo-random bytes on any system. - // Note that it may be important that our $random_state is passed - // through hash() prior to being rolled into $output, that the two hash() - // invocations are different, and that the extra input into the first one - - // the microtime() - is prepended rather than appended. This is to avoid - // directly leaking $random_state via the $output stream, which could - // allow for trivial prediction of further "random" numbers. - while (strlen($bytes) < $count) { - $random_state = hash('sha256', microtime() . mt_rand() . $random_state); - $bytes .= hash('sha256', mt_rand() . $random_state, true); + + if ($handle) { + $entropy .= @fread($handle, $bytes); + } else { + // Measure the time that the operations will take on average + for ($i = 0; $i < 3; $i++) { + $c1 = microtime(true); + $var = sha1(mt_rand()); + for ($j = 0; $j < 50; $j++) { + $var = sha1($var); + } + $c2 = microtime(true); + $entropy .= $c1 . $c2; + } + + // Based on the above measurement determine the total rounds + // in order to bound the total running time. + $rounds = (int) ($msec_per_round * 50 / (int) (($c2 - $c1) * 1000000)); + + // Take the additional measurements. On average we can expect + // at least $bits_per_round bits of entropy from each measurement. + $iter = $bytes * (int) (ceil(8 / $bits_per_round)); + + for ($i = 0; $i < $iter; $i++) { + $c1 = microtime(); + $var = sha1(mt_rand()); + for ($j = 0; $j < $rounds; $j++) { + $var = sha1($var); + } + $c2 = microtime(); + $entropy .= $c1 . $c2; + } } + + // We assume sha1 is a deterministic extractor for the $entropy variable. + $str .= sha1($entropy, true); + + } while ($length > strlen($str)); + + if ($handle) { + @fclose($handle); } - $output = substr($bytes, 0, $count); - $bytes = substr($bytes, $count); - return $output; + + return substr($str, 0, $length); } /** -- cgit v1.2.3 From 003946eff06fcafe60db5894e1ade0abee7314b4 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Thu, 13 Jun 2013 14:10:35 -0400 Subject: code style --- engine/classes/ElggCrypto.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/engine/classes/ElggCrypto.php b/engine/classes/ElggCrypto.php index 358b721ea..b6a8b2024 100644 --- a/engine/classes/ElggCrypto.php +++ b/engine/classes/ElggCrypto.php @@ -175,8 +175,7 @@ class ElggCrypto { * * @see https://github.com/zendframework/zf2/blob/master/library/Zend/Math/Rand.php#L179 */ - public static function getRandomString($length, $chars = null) - { + public static function getRandomString($length, $chars = null) { if ($length < 1) { throw new InvalidArgumentException('Length should be >= 1'); } -- cgit v1.2.3 From 0900be47471476e9aaeac2877779d0051b2c2d4e Mon Sep 17 00:00:00 2001 From: Sem Date: Mon, 4 Nov 2013 21:30:24 +0100 Subject: Upgrade htmLawed to 1.1.16 --- mod/htmlawed/vendors/htmLawed/htmLawed.php | 82 +- mod/htmlawed/vendors/htmLawed/htmLawedTest.php | 89 +- mod/htmlawed/vendors/htmLawed/htmLawed_README.htm | 4336 ++++++++++---------- mod/htmlawed/vendors/htmLawed/htmLawed_README.txt | 3433 ++++++++-------- .../vendors/htmLawed/htmLawed_TESTCASE.txt | 66 +- 5 files changed, 4089 insertions(+), 3917 deletions(-) mode change 100755 => 100644 mod/htmlawed/vendors/htmLawed/htmLawed.php mode change 100755 => 100644 mod/htmlawed/vendors/htmLawed/htmLawedTest.php mode change 100755 => 100644 mod/htmlawed/vendors/htmLawed/htmLawed_README.txt mode change 100755 => 100644 mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed.php b/mod/htmlawed/vendors/htmLawed/htmLawed.php old mode 100755 new mode 100644 index 0d9624961..63f8c4162 --- a/mod/htmlawed/vendors/htmLawed/htmLawed.php +++ b/mod/htmlawed/vendors/htmLawed/htmLawed.php @@ -1,9 +1,9 @@ ', $x, ''; + } + elseif($do < 3 or isset($ok['#pcdata'])){echo $x;} elseif(strpos($x, "\x02\x04")){ foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){ echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : '')); @@ -202,7 +205,7 @@ for($i=-1, $ci=count($t); ++$i<$ci;){ }elseif($do > 4){echo preg_replace('`\S`', '', $x);} } // get markup - if(!preg_match('`^(/?)([a-zA-Z1-6]+)([^>]*)>(.*)`sm', $t[$i], $r)){$x = $t[$i]; continue;} + if(!preg_match('`^(/?)([a-z1-6]+)([^>]*)>(.*)`sm', $t[$i], $r)){$x = $t[$i]; continue;} $s = null; $e = null; $a = null; $x = null; list($all, $s, $e, $a, $x) = $r; // close tag if($s){ @@ -333,7 +336,7 @@ $c = isset($C['schemes'][$c]) ? $C['schemes'][$c] : $C['schemes']['*']; static $d = 'denied:'; if(isset($c['!']) && substr($p, 0, 7) != $d){$p = "$d$p";} if(isset($c['*']) or !strcspn($p, '#?;') or (substr($p, 0, 7) == $d)){return "{$b}{$p}{$a}";} // All ok, frag, query, param -if(preg_match('`^([a-z\d\-+.&#; ]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])){ // Denied prot +if(preg_match('`^([^:?[@!$()*,=/\'\]]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])){ // Denied prot return "{$b}{$d}{$p}{$a}"; } if($C['abs_url']){ @@ -488,7 +491,7 @@ global $S; $rl = isset($S[$e]) ? $S[$e] : array(); $a = array(); $nfr = 0; foreach($aA as $k=>$v){ - if(((isset($C['deny_attribute']['*']) ? isset($C['deny_attribute'][$k]) : !isset($C['deny_attribute'][$k])) or isset($rl[$k])) && ((!isset($rl['n'][$k]) && !isset($rl['n']['*'])) or isset($rl[$k])) && (isset($aN[$k][$e]) or (isset($aNU[$k]) && !isset($aNU[$k][$e])))){ + if(((isset($C['deny_attribute']['*']) ? isset($C['deny_attribute'][$k]) : !isset($C['deny_attribute'][$k])) && (isset($aN[$k][$e]) or (isset($aNU[$k]) && !isset($aNU[$k][$e]))) && !isset($rl['n'][$k]) && !isset($rl['n']['*'])) or isset($rl[$k])){ if(isset($aNE[$k])){$v = $k;} elseif(!empty($lcase) && (($e != 'button' or $e != 'input') or $k == 'type')){ // Rather loose but ?not cause issues $v = (isset($aNL[($v2 = strtolower($v))])) ? $v2 : $v; @@ -622,7 +625,7 @@ if($e == 'u'){$e = 'span'; return 'text-decoration: underline;';} static $fs = array('0'=>'xx-small', '1'=>'xx-small', '2'=>'small', '3'=>'medium', '4'=>'large', '5'=>'x-large', '6'=>'xx-large', '7'=>'300%', '-1'=>'smaller', '-2'=>'60%', '+1'=>'larger', '+2'=>'150%', '+3'=>'200%', '+4'=>'300%'); if($e == 'font'){ $a2 = ''; - if(preg_match('`face\s*=\s*(\'|")([^=]+?)\\1`i', $a, $m) or preg_match('`face\s*=\s*([^"])(\S+)`i', $a, $m)){ + if(preg_match('`face\s*=\s*(\'|")([^=]+?)\\1`i', $a, $m) or preg_match('`face\s*=(\s*)(\S+)`i', $a, $m)){ $a2 .= ' font-family: '. str_replace('"', '\'', trim($m[2])). ';'; } if(preg_match('`color\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m)){ @@ -641,41 +644,50 @@ return ''; function hl_tidy($t, $w, $p){ // Tidy/compact HTM if(strpos(' pre,script,textarea', "$p,")){return $t;} -$t = str_replace(' ]*(?)\s+`', '`\s+`', '`(<\w[^>]*(?) `'), array(' $1', ' ', '$1'), preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)()`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t))); +$t = preg_replace('`\s+`', ' ', preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)()`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t)); if(($w = strtolower($w)) == -1){ return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t); } $s = strpos(" $w", 't') ? "\t" : ' '; $s = preg_match('`\d`', $w, $m) ? str_repeat($s, $m[0]) : str_repeat($s, ($s == "\t" ? 1 : 2)); -$n = preg_match('`[ts]([1-9])`', $w, $m) ? $m[1] : 0; +$N = preg_match('`[ts]([1-9])`', $w, $m) ? $m[1] : 0; $a = array('br'=>1); -$b = array('button'=>1, 'input'=>1, 'option'=>1); +$b = array('button'=>1, 'input'=>1, 'option'=>1, 'param'=>1); $c = array('caption'=>1, 'dd'=>1, 'dt'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'isindex'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'object'=>1, 'p'=>1, 'pre'=>1, 'td'=>1, 'textarea'=>1, 'th'=>1); -$d = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'colgroup'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'fieldset'=>1, 'form'=>1, 'hr'=>1, 'iframe'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1); -ob_start(); -if(isset($d[$p])){echo str_repeat($s, ++$n);} -$t = explode('<', $t); -echo ltrim(array_shift($t)); -for($i=-1, $j=count($t); ++$i<$j;){ - $r = ''; list($e, $r) = explode('>', $t[$i]); - $x = $e[0] == '/' ? 0 : (substr($e, -1) == '/' ? 1 : ($e[0] != '!' ? 2 : -1)); - $y = !$x ? ltrim($e, '/') : ($x > 0 ? substr($e, 0, strcspn($e, ' ')) : 0); - $e = "<$e>"; - if(isset($d[$y])){ - if(!$x){echo "\n", str_repeat($s, --$n), "$e\n", str_repeat($s, $n);} - else{echo "\n", str_repeat($s, $n), "$e\n", str_repeat($s, ($x != 1 ? ++$n : $n));} - echo ltrim($r); continue; +$d = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'colgroup'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'fieldset'=>1, 'form'=>1, 'hr'=>1, 'iframe'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1); +$T = explode('<', $t); +$X = 1; +while($X){ + $n = $N; + $t = $T; + ob_start(); + if(isset($d[$p])){echo str_repeat($s, ++$n);} + echo ltrim(array_shift($t)); + for($i=-1, $j=count($t); ++$i<$j;){ + $r = ''; list($e, $r) = explode('>', $t[$i]); + $x = $e[0] == '/' ? 0 : (substr($e, -1) == '/' ? 1 : ($e[0] != '!' ? 2 : -1)); + $y = !$x ? ltrim($e, '/') : ($x > 0 ? substr($e, 0, strcspn($e, ' ')) : 0); + $e = "<$e>"; + if(isset($d[$y])){ + if(!$x){ + if($n){echo "\n", str_repeat($s, --$n), "$e\n", str_repeat($s, $n);} + else{++$N; ob_end_clean(); continue 2;} + } + else{echo "\n", str_repeat($s, $n), "$e\n", str_repeat($s, ($x != 1 ? ++$n : $n));} + echo $r; continue; + } + $f = "\n". str_repeat($s, $n); + if(isset($c[$y])){ + if(!$x){echo $e, $f, $r;} + else{echo $f, $e, $r;} + }elseif(isset($b[$y])){echo $f, $e, $r; + }elseif(isset($a[$y])){echo $e, $f, $r; + }elseif(!$y){echo $f, $e, $f, $r; + }else{echo $e, $r;} } - $f = "\n". str_repeat($s, $n); - if(isset($c[$y])){ - if(!$x){echo $e, $f, ltrim($r);} - else{echo $f, $e, $r;} - }elseif(isset($b[$y])){echo $f, $e, $r; - }elseif(isset($a[$y])){echo $e, $f, ltrim($r); - }elseif(!$y){echo $f, $e, $f, ltrim($r); - }else{echo $e, $r;} -} -$t = preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents()); + $X = 0; +} +$t = str_replace(array("\n ", " \n"), "\n", preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents())); ob_end_clean(); if(($l = strpos(" $w", 'r') ? (strpos(" $w", 'n') ? "\r\n" : "\r") : 0)){ $t = str_replace("\n", $l, $t); @@ -686,7 +698,7 @@ return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array( function hl_version(){ // rel -return '1.1.11'; +return '1.1.16'; // eof } diff --git a/mod/htmlawed/vendors/htmLawed/htmLawedTest.php b/mod/htmlawed/vendors/htmLawed/htmLawedTest.php old mode 100755 new mode 100644 index 806aa4641..3a5b92155 --- a/mod/htmlawed/vendors/htmLawed/htmLawedTest.php +++ b/mod/htmlawed/vendors/htmLawed/htmLawedTest.php @@ -1,10 +1,10 @@ /g, '>'); - i = i.replace(/'; if(f.style){f.style.display = 'none';} else{f.visibility = 'hidden';} - f.innerHTML = '

    '; + f.innerHTML = '

    '; f.action = 'htmLawedTest.php?pre=1'; f.target = 'hlprehtm'; f.method = 'post'; + var t = document.createElement('textarea'); + t.name = 'inputH'; + t.value = i; + f.appendChild(t); var b = document.getElementsByTagName('body')[0]; b.appendChild(f); f.submit(); @@ -284,9 +286,6 @@ function sndValidn(id, type){ var i = document.getElementById(id); if(!i){return;} i = i.value; - i = i.replace(/>/g, '>'); - i = i.replace(/'; if(f.style){f.style.display = 'none';} else{f.visibility = 'hidden';} - f.innerHTML = '

    '; + f.innerHTML = '

    '; f.action = 'http://validator.w3.org/check'; f.target = 'validate'+id+type; + var t = document.createElement('textarea'); + t.name = 'fragment'; + t.value = i; + f.appendChild(t); var b = document.getElementsByTagName('body')[0]; b.appendChild(f); f.submit(); @@ -376,6 +379,58 @@ tRs = { } }; tRs.adEv(window, 'load', tRs.adBtn); +// Diff Match and Patch javascript code by Neil Fraser; Apache license 2.0; http://code.google.com/p/google-diff-match-patch/ +(function(){function diff_match_patch(){this.Diff_Timeout=1;this.Diff_EditCost=4;this.Match_Threshold=0.5;this.Match_Distance=1E3;this.Patch_DeleteThreshold=0.5;this.Patch_Margin=4;this.Match_MaxBits=32} +diff_match_patch.prototype.diff_main=function(a,b,c,d){"undefined"==typeof d&&(d=0>=this.Diff_Timeout?Number.MAX_VALUE:(new Date).getTime()+1E3*this.Diff_Timeout);if(null==a||null==b)throw Error("Null input. (diff_main)");if(a==b)return a?[[0,a]]:[];"undefined"==typeof c&&(c=!0);var e=c,f=this.diff_commonPrefix(a,b),c=a.substring(0,f),a=a.substring(f),b=b.substring(f),f=this.diff_commonSuffix(a,b),g=a.substring(a.length-f),a=a.substring(0,a.length-f),b=b.substring(0,b.length-f),a=this.diff_compute_(a, +b,e,d);c&&a.unshift([0,c]);g&&a.push([0,g]);this.diff_cleanupMerge(a);return a}; +diff_match_patch.prototype.diff_compute_=function(a,b,c,d){if(!a)return[[1,b]];if(!b)return[[-1,a]];var e=a.length>b.length?a:b,f=a.length>b.length?b:a,g=e.indexOf(f);if(-1!=g)return c=[[1,e.substring(0,g)],[0,f],[1,e.substring(g+f.length)]],a.length>b.length&&(c[0][0]=c[2][0]=-1),c;if(1==f.length)return[[-1,a],[1,b]];return(e=this.diff_halfMatch_(a,b))?(f=e[0],a=e[1],g=e[2],b=e[3],e=e[4],f=this.diff_main(f,g,c,d),c=this.diff_main(a,b,c,d),f.concat([[0,e]],c)):c&&100c);u++){for(var n=-u+q;n<=u-s;n+=2){var l=g+n,m;m=n==-u||n!=u&&j[l-1]d)s+=2;else if(r>e)q+=2;else if(p&&(l=g+k-n,0<=l&&l= +t)return this.diff_bisectSplit_(a,b,m,r,c)}}for(n=-u+o;n<=u-v;n+=2){l=g+n;t=n==-u||n!=u&&i[l-1]d)v+=2;else if(m>e)o+=2;else if(!p&&(l=g+k-n,0<=l&&l=t)))return this.diff_bisectSplit_(a,b,m,r,c)}}return[[-1,a],[1,b]]}; +diff_match_patch.prototype.diff_bisectSplit_=function(a,b,c,d,e){var f=a.substring(0,c),g=b.substring(0,d),a=a.substring(c),b=b.substring(d),f=this.diff_main(f,g,!1,e),e=this.diff_main(a,b,!1,e);return f.concat(e)}; +diff_match_patch.prototype.diff_linesToChars_=function(a,b){function c(a){for(var b="",c=0,f=-1,g=d.length;fd?a=a.substring(c-d):c=a.length?[h,j,n,l,g]:null}if(0>=this.Diff_Timeout)return null; +var d=a.length>b.length?a:b,e=a.length>b.length?b:a;if(4>d.length||2*e.lengthd[4].length?g:d:d:g;var j;a.length>b.length?(g=h[0],d=h[1],e=h[2],j=h[3]):(e=h[0],j=h[1],g=h[2],d=h[3]);h=h[4];return[g,d,e,j,h]}; +diff_match_patch.prototype.diff_cleanupSemantic=function(a){for(var b=!1,c=[],d=0,e=null,f=0,g=0,h=0,j=0,i=0;f=e){if(d>=b.length/2||d>=c.length/2)a.splice(f,0,[0,c.substring(0,d)]),a[f-1][1]=b.substring(0,b.length-d),a[f+1][1]=c.substring(d),f++}else if(e>=b.length/2||e>=c.length/2)a.splice(f,0,[0,b.substring(0,e)]),a[f-1][0]=1,a[f-1][1]=c.substring(0,c.length-e),a[f+1][0]=-1,a[f+1][1]=b.substring(e),f++;f++}f++}}; +diff_match_patch.prototype.diff_cleanupSemanticLossless=function(a){function b(a,b){if(!a||!b)return 6;var c=a.charAt(a.length-1),d=b.charAt(0),e=c.match(diff_match_patch.nonAlphaNumericRegex_),f=d.match(diff_match_patch.nonAlphaNumericRegex_),g=e&&c.match(diff_match_patch.whitespaceRegex_),h=f&&d.match(diff_match_patch.whitespaceRegex_),c=g&&c.match(diff_match_patch.linebreakRegex_),d=h&&d.match(diff_match_patch.linebreakRegex_),i=c&&a.match(diff_match_patch.blanklineEndRegex_),j=d&&b.match(diff_match_patch.blanklineStartRegex_); +return i||j?5:c||d?4:e&&!g&&h?3:g||h?2:e||f?1:0}for(var c=1;c=i&&(i=k,g=d,h=e,j=f)}a[c-1][1]!=g&&(g?a[c-1][1]=g:(a.splice(c-1,1),c--),a[c][1]= +h,j?a[c+1][1]=j:(a.splice(c+1,1),c--))}c++}};diff_match_patch.nonAlphaNumericRegex_=/[^a-zA-Z0-9]/;diff_match_patch.whitespaceRegex_=/\s/;diff_match_patch.linebreakRegex_=/[\r\n]/;diff_match_patch.blanklineEndRegex_=/\n\r?\n$/;diff_match_patch.blanklineStartRegex_=/^\r?\n\r?\n/; +diff_match_patch.prototype.diff_cleanupEfficiency=function(a){for(var b=!1,c=[],d=0,e=null,f=0,g=!1,h=!1,j=!1,i=!1;fb)break;e=c;f=d}return a.length!=g&&-1===a[g][0]?f:f+(b-e)}; +diff_match_patch.prototype.diff_prettyHtml=function(a){for(var b=[],c=/&/g,d=//g,f=/\n/g,g=0;g¬
    ");switch(h){case 1:b[g]=''+j+"";break;case -1:b[g]=''+j+"";break;case 0:b[g]=""+j+""}}return b.join("")}; +diff_match_patch.prototype.diff_text1=function(a){for(var b=[],c=0;cthis.Match_MaxBits)throw Error("Pattern too long for this browser.");var e=this.match_alphabet_(b),f=this,g=this.Match_Threshold,h=a.indexOf(b,c);-1!=h&&(g=Math.min(d(0,h),g),h=a.lastIndexOf(b,c+b.length),-1!=h&&(g=Math.min(d(0,h),g)));for(var j=1<=i;o--){var v=e[a.charAt(o-1)];k[o]=0===s?(k[o+1]<<1|1)&v:(k[o+1]<<1|1)&v|(q[o+1]|q[o])<<1|1|q[o+1];if(k[o]&j&&(v=d(s,o-1),v<=g))if(g=v,h=o-1,h>c)i=Math.max(1,2*c-h);else break}if(d(s+1,c)>g)break;q=k}return h}; +diff_match_patch.prototype.match_alphabet_=function(a){for(var b={},c=0;c=2*this.Patch_Margin&& +e&&(this.patch_addContext_(a,h),c.push(a),a=new diff_match_patch.patch_obj,e=0,h=d,f=g)}1!==i&&(f+=k.length);-1!==i&&(g+=k.length)}e&&(this.patch_addContext_(a,h),c.push(a));return c};diff_match_patch.prototype.patch_deepCopy=function(a){for(var b=[],c=0;cthis.Match_MaxBits){if(j=this.match_main(b,h.substring(0,this.Match_MaxBits),g),-1!=j&&(i=this.match_main(b,h.substring(h.length-this.Match_MaxBits),g+h.length-this.Match_MaxBits),-1==i||j>=i))j=-1}else j=this.match_main(b,h,g); +if(-1==j)e[f]=!1,d-=a[f].length2-a[f].length1;else if(e[f]=!0,d=j-g,g=-1==i?b.substring(j,j+h.length):b.substring(j,i+this.Match_MaxBits),h==g)b=b.substring(0,j)+this.diff_text2(a[f].diffs)+b.substring(j+h.length);else if(g=this.diff_main(h,g,!1),h.length>this.Match_MaxBits&&this.diff_levenshtein(g)/h.length>this.Patch_DeleteThreshold)e[f]=!1;else{this.diff_cleanupSemanticLossless(g);for(var h=0,k,i=0;ie[0][1].length){var f=b-e[0][1].length;e[0][1]=c.substring(e[0][1].length)+e[0][1];d.start1-=f;d.start2-=f;d.length1+=f;d.length2+=f}d=a[a.length-1];e=d.diffs;0==e.length||0!=e[e.length-1][0]?(e.push([0, +c]),d.length1+=b,d.length2+=b):b>e[e.length-1][1].length&&(f=b-e[e.length-1][1].length,e[e.length-1][1]+=c.substring(0,f),d.length1+=f,d.length2+=f);return c}; +diff_match_patch.prototype.patch_splitMax=function(a){for(var b=this.Match_MaxBits,c=0;c2*b?(h.length1+=i.length,e+=i.length,j=!1,h.diffs.push([g,i]),d.diffs.shift()):(i=i.substring(0,b-h.length1-this.Patch_Margin),h.length1+=i.length,e+=i.length,0===g?(h.length2+=i.length,f+=i.length):j=!1,h.diffs.push([g,i]),i==d.diffs[0][1]?d.diffs.shift():d.diffs[0][1]=d.diffs[0][1].substring(i.length))}g=this.diff_text2(h.diffs);g=g.substring(g.length-this.Patch_Margin);i=this.diff_text1(d.diffs).substring(0,this.Patch_Margin);""!==i&& +(h.length1+=i.length,h.length2+=i.length,0!==h.diffs.length&&0===h.diffs[h.diffs.length-1][0]?h.diffs[h.diffs.length-1][1]+=i:h.diffs.push([0,i]));j||a.splice(++c,0,h)}}};diff_match_patch.prototype.patch_toText=function(a){for(var b=[],c=0;c htmLawed (<?php echo hl_version();?>) test @@ -545,7 +600,7 @@ if($do){ $st = microtime(); $out = htmLawed($_POST['text'], $cfg, $_POST['spec']); $et = microtime(); - echo '
    Input code » ', strlen($_POST['text']), ' chars, ~', ($tag = round((substr_count($_POST['text'], '>') + substr_count($_POST['text'], '<'))/2)), ' tag', ($tag > 1 ? 's' : ''), ' ', (!isset($_POST['text'][$_hlimit]) ? ' Input binary » ' : ''), ' Finalized internal settings »  ', '
    Output » htmLawed processing time ', number_format(((substr($et,0,9)) + (substr($et,-10)) - (substr($st,0,9)) - (substr($st,-10))),4), ' s', (($mem = memory_get_peak_usage()) !== false ? ', peak memory usage '. round(($mem-$pre_mem)/1048576, 2). ' MB' : ''), '
    '; + echo '
    Input code » ', strlen($_POST['text']), ' chars, ~', ($tag = round((substr_count($_POST['text'], '>') + substr_count($_POST['text'], '<'))/2)), ' tag', ($tag > 1 ? 's' : ''), ' ', (!isset($_POST['text'][$_hlimit]) ? ' Input binary » ' : ''), ' Finalized internal settings »  ', '
    Output » htmLawed processing time ', number_format(((substr($et,0,9)) + (substr($et,-10)) - (substr($st,0,9)) - (substr($st,-10))),4), ' s', (($mem = memory_get_peak_usage()) !== false ? ', peak memory usage '. round(($mem-$pre_mem)/1048576, 2). ' MB' : ''), '
    '; if($_w3c_validate && $validation) { ?> @@ -555,7 +610,7 @@ if($do){
    Output code »
    ', format($out), '
    ', (!isset($_POST['text'][$_hlimit]) ? '
    Output binary »' : ''), '
    Output rendered »
    ', $out, '
    '; + echo '

    Output code »
    ', format($out), '
    ', (!isset($_POST['text'][$_hlimit]) ? ' Output binary »' : ''), ' Diff »
    Output rendered »
    ', $out, '
    '; } else{ ?> diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed_README.htm b/mod/htmlawed/vendors/htmLawed/htmLawed_README.htm index 6dd78fb2e..819b39e06 100644 --- a/mod/htmlawed/vendors/htmLawed/htmLawed_README.htm +++ b/mod/htmlawed/vendors/htmLawed/htmLawed_README.htm @@ -1,2160 +1,2178 @@ - - - - - - - - -htmLawed documentation | htmLawed PHP software is a free, open-source, customizable HTML input purifier and filter - - -
    -

    htmLawed documentation

    - -
    1  About htmLawed
    1.1  Example uses
    1.2  Features
    1.3  History
    1.4  License & copyright
    1.5  Terms used here
    -2  Usage
    2.1  Simple
    2.2  Configuring htmLawed using the $config parameter
    2.3  Extra HTML specifications using the $spec parameter
    2.4  Performance time & memory usage
    2.5  Some security risks to keep in mind
    2.6  Use without modifying old kses() code
    2.7  Tolerance for ill-written HTML
    2.8  Limitations & work-arounds
    2.9  Examples of usage
    -3  Details
    3.1  Invalid/dangerous characters
    3.2  Character references/entities
    3.3  HTML elements
    -    3.3.1  HTML comments and CDATA sections
    -    3.3.2  Tag-transformation for better XHTML-Strict
    -    3.3.3  Tag balancing and proper nesting
    -    3.3.4  Elements requiring child elements
    -    3.3.5  Beautify or compact HTML
    3.4  Attributes
    -    3.4.1  Auto-addition of XHTML-required attributes
    -    3.4.2  Duplicate/invalid id values
    -    3.4.3  URL schemes (protocols) and scripts in attribute values
    -    3.4.4  Absolute & relative URLs
    -    3.4.5  Lower-cased, standard attribute values
    -    3.4.6  Transformation of deprecated attributes
    -    3.4.7  Anti-spam & href
    -    3.4.8  Inline style properties
    -    3.4.9  Hook function for tag content
    3.5  Simple configuration directive for most valid XHTML
    3.6  Simple configuration directive for most safe HTML
    3.7  Using a hook function
    3.8  Obtaining finalized parameter values
    3.9  Retaining non-HTML tags in input with mixed markup
    -4  Other
    4.1  Support
    4.2  Known issues
    4.3  Change-log
    4.4  Testing
    4.5  Upgrade, & old versions
    4.6  Comparison with HTMLPurifier
    4.7  Use through application plug-ins/modules
    4.8  Use in non-PHP applications
    4.9  Donate
    4.10  Acknowledgements
    -5  Appendices
    5.1  Characters discouraged in HTML
    5.2  Valid attribute-element combinations
    5.3  CSS 2.1 properties accepting URLs
    5.4  Microsoft Windows 1252 character replacements
    5.5  URL format
    5.6  Brief on htmLawed code
    - -
    -
    -
    htmLawed_README.txt, 8 June 2012
    -htmLawed 1.1.11, 5 June 2012
    -Copyright Santosh Patnaik
    -Dual licensed with LGPL 3 and GPL 2 or later
    -A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed 
    -
    - -

    -1  About htmLawed -

    (to top)
    -
    -  htmLawed is a highly customizable single-file PHP script to make text secure, and standard- and admin policy-compliant for use in the body of HTML 4, XHTML 1 or 1.1, or generic XML documents. It is thus a configurable input (X)HTML filter, processor, purifier, sanitizer, beautifier, etc., and an alternative to the HTMLTidy application.
    -
    -  The lawing in of input text is needed to ensure that HTML code in the text is standard-compliant, does not introduce security vulnerabilities, and does not break the aesthetics, design or layout of web-pages. htmLawed tries to do this by, for example, making HTML well-formed with balanced and properly nested tags, neutralizing code that may be used for cross-site scripting (XSS) attacks, and allowing only specified HTML elements/tags and attributes.
    - -

    -1.1  Example uses -

    (to top)
    -
    -  *  Filtering of text submitted as comments on blogs to allow only certain HTML elements
    -
    -  *  Making RSS/Atom newsfeed item-content standard-compliant: often one uses an excerpt from an HTML document for the content, and with unbalanced tags, non-numerical entities, etc., such excerpts may not be XML-compliant
    -
    -  *  Text processing for stricter XML standard-compliance: e.g., to have lowercased x in hexadecimal numeric entities becomes necessary if an XHTML document with MathML content needs to be served as application/xml
    -
    -  *  Scraping text or data from web-pages
    -
    -  *  Pretty-printing HTML code
    - -
    -

    -1.2  Features -

    (to top)
    -
    -  Key: * security feature, ^ standard compliance, ~ requires setting right options, ` different from Kses
    -
    -  *  make input more secure and standard-compliant
    -  *  use for HTML 4, XHTML 1.0 or 1.1, or even generic XML documents  ^~`
    -
    -  *  beautify or compact HTML  ^~`
    -
    -  *  restrict elements  ^~`
    -  *  proper closure of empty elements like img  ^`
    -  *  transform deprecated elements like u  ^~`
    -  *  HTML comments and CDATA sections can be permitted  ^~`
    -  *  elements like script, object and form can be permitted  ~
    -
    -  *  restrict attributes, including element-specifically  ^~`
    -  *  remove invalid attributes  ^`
    -  *  element and attribute names are lower-cased  ^
    -  *  provide required attributes, like alt for image  ^`
    -  *  transform deprecated attributes  ^~`
    -  *  attributes declared only once  ^`
    -
    -  *  restrict attribute values, including element-specifically  ^~`
    -  *  a value is declared for empty (minimized) attributes like checked  ^
    -  *  check for potentially dangerous attribute values  *~
    -  *  ensure unique id attribute values  ^~`
    -  *  double-quote attribute values  ^
    -  *  lower-case standard attribute values like password  ^`
    -
    -  *  attribute-specific URL protocol/scheme restriction  *~`
    -  *  disable dynamic expressions in style values  *~`
    -
    -  *  neutralize invalid named character entities  ^`
    -  *  convert hexadecimal numeric entities to decimal ones, or vice versa  ^~`
    -  *  convert named entities to numeric ones for generic XML use  ^~`
    -
    -  *  remove null characters  *
    -  *  neutralize potentially dangerous proprietary Netscape Javascript entities  *
    -  *  replace potentially dangerous soft-hyphen character in URL-accepting attribute values with spaces  *
    -
    -  *  remove common invalid characters not allowed in HTML or XML  ^`
    -  *  replace characters from Microsoft applications like Word that are discouraged in HTML or XML  ^~`
    -  *  neutralize entities for characters invalid or discouraged in HTML or XML  ^`
    -  *  appropriately neutralize <, &, ", and > characters  ^*`
    -
    -  *  understands improperly spaced tag content (like, spread over more than a line) and properly spaces them  `
    -  *  attempts to balance tags for well-formedness  ^~`
    -  *  understands when omitable closing tags like </p> (allowed in HTML 4, transitional, e.g.) are missing  ^~`
    -  *  attempts to permit only validly nested tags  ^~`
    -  *  option to remove or neutralize bad content ^~`
    -  *  attempts to rectify common errors of plain-text misplacement (e.g., directly inside blockquote) ^~`
    -
    -  *  fast, non-OOP code of ~45 kb incurring peak basal memory usage of ~0.5 MB
    -  *  compatible with pre-existing code using Kses (the filter used by WordPress)
    -
    -  *  optional anti-spam measures such as addition of rel="nofollow" and link-disabling  ~`
    -  *  optionally makes relative URLs absolute, and vice versa  ~`
    -
    -  *  optionally mark & to identify the entities for &, < and > introduced by htmLawed  ~`
    -
    -  *  allows deployment of powerful hook functions to inject HTML, consolidate style attributes to class, finely check attribute values, etc.  ~`
    -
    -  *  independent of character encoding of input and does not affect it
    -
    -  *  tolerance for ill-written HTML to a certain degree
    - -
    -

    -1.3  History -

    (to top)
    -
    -  htmLawed was developed for use with LabWiki, a wiki software developed at PHP Labware, as a suitable software could not be found. Existing PHP software like Kses and HTMLPurifier were deemed inadequate, slow, resource-intensive, or dependent on external applications like HTML Tidy.
    -
    -  htmLawed started as a modification of Ulf Harnhammar's Kses (version 0.2.2) software, and is compatible with code that uses Kses; see section 2.6.
    - -
    -

    -1.4  License & copyright -

    (to top)
    -
    -  htmLawed is free and open-source software dual licensed under LGPL license version 3 and GPL license version 2 or later, and copyrighted by Santosh Patnaik, MD, PhD.
    - -
    -

    -1.5  Terms used here -

    (to top)
    -
    -  *  administrator - or admin; person setting up the code to pass input through htmLawed; also, user
    -  *  attributes - name-value pairs like href="http://x.com" in opening tags
    -  *  author - writer
    -  *  character - atomic unit of text; internally represented by a numeric code-point as specified by the encoding or charset in use
    -  *  entity - markup like &gt; and &#160; used to refer to a character
    -  *  element - HTML element like a and img
    -  *  element content -  content between the opening and closing tags of an element, like click of <a href="x">click</a>
    -  *  HTML - implies XHTML unless specified otherwise
    -  *  input - text string given to htmLawed to process
    -  *  processing - involves filtering, correction, etc., of input
    -  *  safe - absence or reduction of certain characters and HTML elements and attributes in the input that can otherwise potentially and circumstantially expose web-site users to security vulnerabilities like cross-site scripting attacks (XSS)
    -  *  scheme - URL protocol like http and ftp
    -  *  specs - standard specifications
    -  *  style property - terms like border and height for which declarations are made in values for the style attribute of elements
    -  *  tag - markers like <a href="x"> and </a> delineating element content; the opening tag can contain attributes
    -  *  tag content - consists of tag markers < and >, element names like div, and possibly attributes
    -  *  user - administrator
    -  *  writer - end-user like a blog commenter providing the input that is to be processed; also, author
    - -
    -
    -

    -2  Usage -

    (to top)
    -
    -  htmLawed should work with PHP 4.4 and higher. Either include() the htmLawed.php file or copy-paste the entire code.
    -
    -  To easily test htmLawed using a form-based interface, use the provided demo (htmLawed.php and htmLawedTest.php should be in the same directory on the web-server).
    -
    Note: For code for usage of the htmLawed class (for htmLawed in OOP), please refer to the htmLawed website; the filtering itself can be configured, etc., as described here.
    - -

    -2.1  Simple -

    (to top)
    -
    -  The input text to be processed, $text, is passed as an argument of type string; htmLawed() returns the processed string:
    -
    - -    $processed = htmLawed($text); -
    -
    Note: If input is from a $_GET or $_POST value, and magic quotes are enabled on the PHP setup, run stripslashes() on the input before passing to htmLawed.
    -
    -  By default, htmLawed will process the text allowing all valid HTML elements/tags, secure URL scheme/CSS style properties, etc. It will allow CDATA sections and HTML comments, balance tags, and ensure proper nesting of elements. Such actions can be configured using two other optional arguments -- $config and $spec:
    -
    - -    $processed = htmLawed($text, $config, $spec); -
    -
    -  These extra parameters are detailed below. Some examples are shown in section 2.9.
    -
    Note: For maximum protection against XSS and other scripting attacks (e.g., by disallowing Javascript code), consider using the safe parameter; see section 3.6.
    - -
    -

    -2.2  Configuring htmLawed using the $config parameter -

    (to top)
    -
    $config instructs htmLawed on how to tackle certain tasks. When $config is not specified, or not set as an array (e.g., $config = 1), htmLawed will take default actions. One or many of the task-action or value-specification pairs can be specified in $config as array key-value pairs. If a parameter is not specified, htmLawed will use the default value/action indicated further below.
    -
    - -    $config = array('comment'=>0, 'cdata'=>1); -
    - -    $processed = htmLawed($text, $config); -
    -
    -  Or,
    -
    - -    $processed = htmLawed($text, array('comment'=>0, 'cdata'=>1)); -
    -
    -  Below are the possible value-specification combinations. In PHP code, values that are integers should not be quoted and should be used as numeric types (unless meant as string/text).
    -
    -  Key: * default, ^ different default when htmLawed is used in the Kses-compatible mode (see section 2.6), ~ different default when valid_xhtml is set to 1 (see section 3.5), " different default when safe is set to 1 (see section 3.6)
    -
    abs_url
    -  Make URLs absolute or relative; $config["base_url"] needs to be set; see section 3.4.4
    -
    -1 - make relative
    0 - no action  *
    1 - make absolute
    -
    and_mark
    -  Mark & characters in the original input; see section 3.2
    -
    anti_link_spam
    -  Anti-link-spam measure; see section 3.4.7
    -
    0 - no measure taken  *
    array("regex1", "regex2") - will ensure a rel attribute with nofollow in its value in case the href attribute value matches the regular expression pattern regex1, and/or will remove href if its value matches the regular expression pattern regex2. E.g., array("/./", "/://\W*(?!(abc\.com|xyz\.org))/"); see section 3.4.7 for more.
    -
    anti_mail_spam
    -  Anti-mail-spam measure; see section 3.4.7
    -
    0 - no measure taken  *
    word - @ in mail address in href attribute value is replaced with specified word
    -
    balance
    -  Balance tags for well-formedness and proper nesting; see section 3.3.3
    -
    0 - no
    1 - yes  *
    -
    base_url
    -  Base URL value that needs to be set if $config["abs_url"] is not 0; see section 3.4.4
    -
    cdata
    -  Handling of CDATA sections; see section 3.3.1
    -
    0 - don't consider CDATA sections as markup and proceed as if plain text  ^"
    1 - remove
    2 - allow, but neutralize any <, >, and & inside by converting them to named entities
    3 - allow  *
    -
    clean_ms_char
    -  Replace discouraged characters introduced by Microsoft Word, etc.; see section 3.1
    -
    0 - no  *
    1 - yes
    2 - yes, but replace special single & double quotes with ordinary ones
    -
    comment
    -  Handling of HTML comments; see section 3.3.1
    -
    0 - don't consider comments as markup and proceed as if plain text  ^"
    1 - remove
    2 - allow, but neutralize any <, >, and & inside by converting to named entities
    3 - allow  *
    -
    css_expression
    -  Allow dynamic CSS expression by not removing the expression from CSS property values in style attributes; see section 3.4.8
    -
    0 - remove  *
    1 - allow
    -
    deny_attribute
    -  Denied HTML attributes; see section 3.4
    -
    0 - none  *
    string - dictated by values in string
    on* (like onfocus) attributes not allowed - "
    -
    direct_nest_list
    -  Allow direct nesting of a list within another without requiring it to be a list item; see section 3.3.4
    -
    0 - no  *
    1 - yes
    -
    elements
    -  Allowed HTML elements; see section 3.3
    -
    * -center -dir -font -isindex -menu -s -strike -u -  ~
    applet, embed, iframe, object, script not allowed - "
    -
    hexdec_entity
    -  Allow hexadecimal numeric entities and do not convert to the more widely accepted decimal ones, or convert decimal to hexadecimal ones; see section 3.2
    -
    0 - no
    1 - yes  *
    2 - convert decimal to hexadecimal ones
    -
    hook
    -  Name of an optional hook function to alter the input string, $config or $spec before htmLawed starts its main work; see section 3.7
    -
    0 - no hook function  *
    name - name is name of the hook function (kses_hook  ^)
    -
    hook_tag
    -  Name of an optional hook function to alter tag content finalized by htmLawed; see section 3.4.9
    -
    0 - no hook function  *
    name - name is name of the hook function
    -
    keep_bad
    -  Neutralize bad tags by converting < and > to entities, or remove them; see section 3.3.3
    -
    0 - remove  ^
    1 - neutralize both tags and element content
    2 - remove tags but neutralize element content
    3 and 4 - like 1 and 2 but remove if text (pcdata) is invalid in parent element
    5 and 6 * -  like 3 and 4 but line-breaks, tabs and spaces are left
    -
    lc_std_val
    -  For XHTML compliance, predefined, standard attribute values, like get for the method attribute of form, must be lowercased; see section 3.4.5
    -
    0 - no
    1 - yes  *
    -
    make_tag_strict
    -  Transform/remove these non-strict XHTML elements, even if they are allowed by the admin: applet center dir embed font isindex menu s strike u; see section 3.3.2
    -
    0 - no  ^
    1 - yes, but leave applet, embed and isindex elements that currently can't be transformed  *
    2 - yes, removing applet, embed and isindex elements and their contents (nested elements remain)  ~
    -
    named_entity
    -  Allow non-universal named HTML entities, or convert to numeric ones; see section 3.2
    -
    0 - convert
    1 - allow  *
    -
    no_deprecated_attr
    -  Allow deprecated attributes or transform them; see section 3.4.6
    -
    0 - allow  ^
    1 - transform, but name attributes for a and map are retained  *
    2 - transform
    -
    parent
    -  Name of the parent element, possibly imagined, that will hold the input; see section 3.3
    -
    safe
    -  Magic parameter to make input the most secure against XSS without needing to specify other relevant $config parameters; see section 3.6
    -
    0 - no  *
    1 - will auto-adjust other relevant $config parameters (indicated by " in this list)
    -
    schemes
    -  Array of attribute-specific, comma-separated, lower-cased list of schemes (protocols) allowed in attributes accepting URLs (or ! to deny any URL); * covers all unspecified attributes; see section 3.4.3
    -
    href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https  *
    *: ftp, gopher, http, https, mailto, news, nntp, telnet  ^
    href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; style: !; *:file, http, https  "
    -
    show_setting
    -  Name of a PHP variable to assign the finalized $config and $spec values; see section 3.8
    -
    style_pass
    -  Do not look at style attribute values, letting them through without any alteration
    -
    0 - no *
    1 - htmLawed will let through any style value; see section 3.4.8
    -
    tidy
    -  Beautify or compact HTML code; see section 3.3.5
    -
    -1 - compact
    0 - no  *
    1 or string - beautify (custom format specified by string)
    -
    unique_ids
    id attribute value checks; see section 3.4.2
    -
    0 - no  ^
    1 - remove duplicate and/or invalid ones  *
    word - remove invalid ones and replace duplicate ones with new and unique ones based on the word; the admin-specified word, like my_, should begin with a letter (a-z) and can contain letters, digits, ., _, -, and :.
    -
    valid_xhtml
    -  Magic parameter to make input the most valid XHTML without needing to specify other relevant $config parameters; see section 3.5
    -
    0 - no  *
    1 - will auto-adjust other relevant $config parameters (indicated by ~ in this list)
    -
    xml:lang
    -  Auto-adding xml:lang attribute; see section 3.4.1
    -
    0 - no  *
    1 - add if lang attribute is present
    2 - add if lang attribute is present, and remove lang  ~
    - -
    -

    -2.3  Extra HTML specifications using the $spec parameter -

    (to top)
    -
    -  The $spec argument can be used to disallow an otherwise legal attribute for an element, or to restrict the attribute's values. This can also be helpful as a security measure (e.g., in certain versions of browsers, certain values can cause buffer overflows and denial of service attacks), or in enforcing admin policy compliance. $spec is specified as a string of text containing one or more rules, with multiple rules separated from each other by a semi-colon (;). E.g.,
    -
    - -    $spec = 'i=-*; td, tr=style, id, -*; a=id(match="/[a-z][a-z\d.:\-`"]*/i"/minval=2), href(maxlen=100/minlen=34); img=-width,-alt'; -
    - -    $processed = htmLawed($text, $config, $spec); -
    -
    -  Or,
    -
    - -    $processed = htmLawed($text, $config, 'i=-*; td, tr=style, id, -*; a=id(match="/[a-z][a-z\d.:\-`"]*/i"/minval=2), href(maxlen=100/minlen=34); img=-width,-alt'); -
    -
    -  A rule begins with an HTML element name(s) (rule-element), for which the rule applies, followed by an equal (=) sign. A rule-element may represent multiple elements if comma (,)-separated element names are used. E.g., th,td,tr=.
    -
    -  Rest of the rule consists of comma-separated HTML attribute names. A minus (-) character before an attribute means that the attribute is not permitted inside the rule-element. E.g., -width. To deny all attributes, -* can be used.
    -
    -  Following shows examples of rule excerpts with rule-element a and the attributes that are being permitted:
    -
    -  *  a= - all
    -  *  a=id - all
    -  *  a=href, title, -id, -onclick - all except id and onclick
    -  *  a=*, id, -id - all except id
    -  *  a=-* - none
    -  *  a=-*, href, title - none except href and title
    -  *  a=-*, -id, href, title - none except href and title
    -
    -  Rules regarding attribute values are optionally specified inside round brackets after attribute names in slash ('/')-separated parameter = value pairs. E.g., title(maxlen=30/minlen=5). None, or one or more of the following parameters may be specified:
    -
    -  *  oneof - one or more choices separated by | that the value should match; if only one choice is provided, then the value must match that choice
    -
    -  *  noneof - one or more choices separated by | that the value should not match
    -
    -  *  maxlen and minlen - upper and lower limits for the number of characters in the attribute value; specified in numbers
    -
    -  *  maxval and minval - upper and lower limits for the numerical value specified in the attribute value; specified in numbers
    -
    -  *  match and nomatch - pattern that the attribute value should or should not match; specified as PHP/PCRE-compatible regular expressions with delimiters and possibly modifiers
    -
    -  *  default - a value to force on the attribute if the value provided by the writer does not fit any of the specified parameters
    -
    -  If default is not set and the attribute value does not satisfy any of the specified parameters, then the attribute is removed. The default value can also be used to force all attribute declarations to take the same value (by getting the values declared illegal by setting, e.g., maxlen to -1).
    -
    -  Examples with input <input title="WIDTH" value="10em" /><input title="length" value="5" /> are shown below.
    -
    Rule: input=title(maxlen=60/minlen=6), value
    Output: <input value="10em" /><input title="length" value="5" />
    -
    Rule: input=title(), value(maxval=8/default=6)
    Output: <input title="WIDTH" value="6" /><input title="length" value="5" />
    -
    Rule: input=title(nomatch=%w.d%i), value(match=%em%/default=6em)
    Output: <input value="10em" /><input title="length" value="6em" />
    -
    Rule: input=title(oneof=height|depth/default=depth), value(noneof=5|6)
    Output: <input title="depth" value="10em" /><input title="depth" />
    -
    Special characters: The characters ;, ,, /, (, ), |, ~ and space have special meanings in the rules. Words in the rules that use such characters, or the characters themselves, should be escaped by enclosing in pairs of double-quotes ("). A back-tick (`) can be used to escape a literal ". An example rule illustrating this is input=value(maxlen=30/match="/^\w/"/default="your `"ID`"").
    -
    Note: To deny an attribute for all elements for which it is legal, $config["deny_attribute"] (see section 3.4) can be used instead of $spec. Also, attributes can be allowed element-specifically through $spec while being denied globally through $config["deny_attribute"]. The hook_tag parameter (section 3.4.9) can also be used to implement the $spec functionality.
    - -
    -

    -2.4  Performance time & memory usage -

    (to top)
    -
    -  The time and memory used by htmLawed depends on its configuration and the size of the input, and the amount, nestedness and well-formedness of the HTML markup within it. In particular, tag balancing and beautification each can increase the processing time by about a quarter.
    -
    -  The htmLawed demo can be used to evaluate the performance and effects of different types of input and $config.
    - -
    -

    -2.5  Some security risks to keep in mind -

    (to top)
    -
    -  When setting the parameters/arguments (like those to allow certain HTML elements) for use with htmLawed, one should bear in mind that the setting may let through potentially dangerous HTML code which is meant to steal user-data, deface a website, render a page non-functional, etc.
    -
    -  Unless end-users, either people or software, supplying the content are completely trusted, security issues arising from the degree of HTML usage permission has to be kept in mind. For example, following increase security risks:
    -
    -  *  Allowing script, applet, embed, iframe or object elements, or certain of their attributes like allowscriptaccess
    -
    -  *  Allowing HTML comments (some Internet Explorer versions are vulnerable with, e.g., <!--[if gte IE 4]><script>alert("xss");</script><![endif]-->
    -
    -  *  Allowing dynamic CSS expressions (a feature of the IE browser)
    -
    -  *  Allowing the style attribute
    -
    -  To remove unsecure HTML, code-developers using htmLawed must set $config appropriately. E.g., $config["elements"] = "* -script" to deny the script element (section 3.3), $config["safe"] = 1 to auto-configure ceratin htmLawed parameters for maximizing security (section 3.6), etc.
    -
    -  Permitting the *style* attribute brings in risks of click-jacking, phishing, web-page overlays, etc., even when the safe parameter is enabled (see section 3.6). Except for URLs and a few other things like CSS dynamic expressions, htmLawed currently does not check every CSS style property. It does provide ways for the code-developer implementing htmLawed to do such checks through htmLawed's $spec argument, and through the hook_tag parameter (see section 3.4.8 for more). Disallowing style completely and relying on CSS classes and stylesheet files is recommended.
    -
    -  htmLawed does not check or correct the character encoding of the input it receives. In conjunction with permitting circumstances such as when the character encoding is left undefined through HTTP headers or HTML meta tags, this can permit an exploit (like Google's UTF-7/XSS vulnerability of the past).
    - -
    -

    -2.6  Use without modifying old kses() code -

    (to top)
    -
    -  The Kses PHP script is used by many applications (like WordPress). It is possible to have such applications use htmLawed instead, since it is compatible with code that calls the kses() function declared in the Kses file (usually named kses.php). E.g., application code like this will continue to work after replacing Kses with htmLawed:
    -
    - -    $comment_filtered = kses($comment_input, array('a'=>array(), 'b'=>array(), 'i'=>array())); -
    -
    -  For some of the $config parameters, htmLawed will use values other than the default ones. These are indicated by ^ in section 2.2. To force htmLawed to use other values, function kses() in the htmLawed code should be edited -- a few configurable parameters/variables need to be changed.
    -
    -  If the application uses a Kses file that has the kses() function declared, then, to have the application use htmLawed instead of Kses, simply rename htmLawed.php (to kses.php, e.g.) and replace the Kses file (or just replace the code in the Kses file with the htmLawed code). If the kses() function in the Kses file had been renamed by the application developer (e.g., in WordPress, it is named wp_kses()), then appropriately rename the kses() function in the htmLawed code.
    -
    -  If the Kses file used by the application has been highly altered by the application developers, then one may need a different approach. E.g., with WordPress, it is best to copy the htmLawed code to wp_includes/kses.php, rename the newly added function kses() to wp_kses(), and delete the code for the original wp_kses() function.
    -
    -  If the Kses code has a non-empty hook function (e.g., wp_kses_hook() in case of WordPress), then the code for htmLawed's kses_hook() function should be appropriately edited. However, the requirement of the hook function should be re-evaluated considering that htmLawed has extra capabilities. With WordPress, the hook function is an essential one. The following code is suggested for the htmLawed kses_hook() in case of WordPress:
    -
    - -    function kses_hook($string, &$cf, &$spec){ -
    - -    // kses compatibility -
    - -    $allowed_html = $spec; -
    - -    $allowed_protocols = array(); -
    - -    foreach($cf['schemes'] as $v){ -
    - -     foreach($v as $k2=>$v2){ -
    - -      if(!in_array($k2, $allowed_protocols)){ -
    - -       $allowed_protocols[] = $k2; -
    - -      } -
    - -     } -
    - -    } -
    - -    return wp_kses_hook($string, $allowed_html, $allowed_protocols); -
    - -    // eof -
    - -    } -
    - -
    -

    -2.7  Tolerance for ill-written HTML -

    (to top)
    -
    -  htmLawed can work with ill-written HTML code in the input. However, HTML that is too ill-written may not be read as HTML, and be considered mere plain text instead. Following statements indicate the degree of looseness that htmLawed can work with, and can be provided in instructions to writers:
    -
    -  *  Tags must be flanked by < and > with no > inside -- any needed > should be put in as &gt;. It is possible for tag content (element name and attributes) to be spread over many lines instead of being on one. A space may be present between the tag content and >, like <div > and <img / >, but not after the <.
    -
    -  *  Element and attribute names need not be lower-cased.
    -
    -  *  Attribute string of elements may be liberally spaced with tabs, line-breaks, etc.
    -
    -  *  Attribute values may not be double-quoted, or may be single-quoted.
    -
    -  *  Left-padding of numeric entities (like, &#0160;, &x07ff;) with 0 is okay as long as the number of characters between between the & and the ; does not exceed 8. All entities must end with ; though.
    -
    -  *  Named character entities must be properly cased. E.g., &Lt; or &TILDE; will not be let through without modification.
    -
    -  *  HTML comments should not be inside element tags (okay between tags), and should begin with <!-- and end with -->. Characters like <, >, and & may be allowed inside depending on $config, but any --> inside should be put in as --&gt;. Any -- inside will be automatically converted to -, and a space will be added before the comment delimiter -->.
    -
    -  *  CDATA sections should not be inside element tags, and can be in element content only if plain text is allowed for that element. They should begin with <[CDATA[ and end with ]]>. Characters like <, >, and & may be allowed inside depending on $config, but any ]]> inside should be put in as ]]&gt;.
    -
    -  *  For attribute values, character entities &lt;, &gt; and &amp; should be used instead of characters < and >, and & (when & is not part of a character entity). This applies even for Javascript code in values of attributes like onclick.
    -
    -  *  Characters <, >, & and " that are part of actual Javascript, etc., code in script elements should be used as such and not be put in as entities like &gt;. Otherwise, though the HTML will be valid, the code may fail to work. Further, if such characters have to be used, then they should be put inside CDATA sections.
    -
    -  *  Simple instructions like "an opening tag cannot be present between two closing tags" and "nested elements should be closed in the reverse order of how they were opened" can help authors write balanced HTML. If tags are imbalanced, htmLawed will try to balance them, but in the process, depending on $config["keep_bad"], some code/text may be lost.
    -
    -  *  Input authors should be notified of admin-specified allowed elements, attributes, configuration values (like conversion of named entities to numeric ones), etc.
    -
    -  *  With $config["unique_ids"] not 0 and the id attribute being permitted, writers should carefully avoid using duplicate or invalid id values as even though htmLawed will correct/remove the values, the final output may not be the one desired. E.g., when <a id="home"></a><input id="home" /><label for="home"></label> is processed into
    -<a id="home"></a><input id="prefix_home" /><label for="home"></label>.
    -
    -  *  Note that even if intended HTML is lost in a highly ill-written input, the processed output will be more secure and standard-compliant.
    -
    -  *  For URLs, unless $config["scheme"] is appropriately set, writers should avoid using escape characters or entities in schemes. E.g., htt&#112; (which many browsers will read as the harmless http) may be considered bad by htmLawed.
    -
    -  *  htmLawed will attempt to put plain text present directly inside blockquote, form, map and noscript elements (illegal as per the specs) inside auto-generated div elements.
    - -
    -

    -2.8  Limitations & work-arounds -

    (to top)
    -
    -  htmLawed's main objective is to make the input text more standard-compliant, secure for web-page readers, and free of HTML elements and attributes considered undesirable by the administrator. Some of its current limitations, regardless of this objective, are noted below along with work-arounds.
    -
    -  It should be borne in mind that no browser application is 100% standard-compliant, and that some of the standard specs (like asking for normalization of white-spacing within textarea elements) are clearly wrong. Regarding security, note that unsafe HTML code is not necessarily legally invalid.
    -
    -  *  htmLawed is meant for input that goes into the body of HTML documents. HTML's head-level elements are not supported, nor are the frameset elements frameset, frame and noframes.
    -
    -  *  It cannot transform the non-standard embed elements to the standard-compliant object elements. Yet, it can allow embed elements if permitted (embed is widely used and supported). Admins can certainly use the hook_tag parameter (section 3.4.9) to deploy a custom embed-to-object converter function.
    -
    -  *  The only non-standard element that may be permitted is embed; others like noembed and nobr cannot be permitted without modifying the htmLawed code.
    -
    -  *  It cannot handle input that has non-HTML code like SVG and MathML. One way around is to break the input into pieces and passing only those without non-HTML code to htmLawed. Another is described in section 3.9. A third way may be to some how take advantage of the $config["and_mark"] parameter (see section 3.2).
    -
    -  *  By default, htmLawed won't check many attribute values for standard compliance. E.g., width="20m" with the dimension in non-standard m is let through. Implementing universal and strict attribute value checks can make htmLawed slow and resource-intensive. Admins should look at the hook_tag parameter (section 3.4.9) or $spec to enforce finer checks.
    -
    -  *  The attributes, deprecated (which can be transformed too) or not, that it supports are largely those that are in the specs. Only a few of the proprietary attributes are supported.
    -
    -  *  Except for contained URLs and dynamic expressions (also optional), htmLawed does not check CSS style property values. Admins should look at using the hook_tag parameter (section 3.4.9) or $spec for finer checks. Perhaps the best option is to disallow style but allow class attributes with the right oneof or match values for class, and have the various class style properties in .css CSS stylesheet files.
    -
    -  *  htmLawed does not parse emoticons, decode BBcode, or wikify, auto-converting text to proper HTML. Similarly, it won't convert line-breaks to br elements. Such functions are beyond its purview. Admins should use other code to pre- or post-process the input for such purposes.
    -
    -  *  htmLawed cannot be used to have links force-opened in new windows (by auto-adding appropriate target and onclick attributes to a). Admins should look at Javascript-based DOM-modifying solutions for this. Admins may also be able to use a custom hook function to enforce such checks (hook_tag parameter; see section 3.4.9).
    -
    -  *  Nesting-based checks are not possible. E.g., one cannot disallow p elements specifically inside td while permitting it elsewhere. Admins may be able to use a custom hook function to enforce such checks (hook_tag parameter; see section 3.4.9).
    -
    -  *  Except for optionally converting absolute or relative URLs to the other type, htmLawed will not alter URLs (e.g., to change the value of query strings or to convert http to https. Having absolute URLs may be a standard-requirement, e.g., when HTML is embedded in email messages, whereas altering URLs for other purposes is beyond htmLawed's goals. Admins may be able to use a custom hook function to enforce such checks (hook_tag parameter; see section 3.4.9).
    -
    -  *  Pairs of opening and closing tags that do not enclose any content (like <em></em>) are not removed. This may be against the standard specs for certain elements (e.g., table). However, presence of such standard-incompliant code will not break the display or layout of content. Admins can also use simple regex-based code to filter out such code.
    -
    -  *  htmLawed does not check for certain element orderings described in the standard specs (e.g., in a table, tbody is allowed before tfoot). Admins may be able to use a custom hook function to enforce such checks (hook_tag parameter; see section 3.4.9).
    -
    -  *  htmLawed does not check the number of nested elements. E.g., it will allow two caption elements in a table element, illegal as per the specs. Admins may be able to use a custom hook function to enforce such checks (hook_tag parameter; see section 3.4.9).
    -
    -  *  htmLawed might convert certain entities to actual characters and remove backslashes and CSS comment-markers (/*) in style attribute values in order to detect malicious HTML like crafted IE-specific dynamic expressions like &#101;xpression.... If this is too harsh, admins can allow CSS expressions through htmLawed core but then use a custom function through the hook_tag parameter (section 3.4.9) to more specifically identify CSS expressions in the style attribute values. Also, using $config["style_pass"], it is possible to have htmLawed pass style attribute values without even looking at them (section 3.4.8).
    -
    -  *  htmLawed does not correct certain possible attribute-based security vulnerabilities (e.g., <a href="http://x%22+style=%22background-image:xss">x</a>). These arise when browsers mis-identify markup in escaped text, defeating the very purpose of escaping text (a bad browser will read the given example as <a href="http://x" style="background-image:xss">x</a>).
    -
    -  *  Because of poor Unicode support in PHP, htmLawed does not remove the high value HTML-invalid characters with multi-byte code-points. Such characters however are extremely unlikely to be in the input. (see section 3.1).
    -
    -  *  htmLawed does not check or correct the character encoding of the input it receives. In conjunction with permitting circumstances such as when the character encoding is left undefined through HTTP headers or HTML meta tags, this can permit an exploit (like Google's UTF-7/XSS vulnerability of the past).
    -
    -  *  Like any script using PHP's PCRE regex functions, PHP setup-specific low PCRE limit values can cause htmLawed to at least partially fail with very long input texts.
    - -
    -

    -2.9  Examples of usage -

    (to top)
    -
    -  Safest, allowing only safe HTML markup --
    -
    - -    $config = array('safe'=>1); -
    - -    $out = htmLawed($in); -
    -
    -  Simplest, allowing all valid HTML markup except javascript: --
    -
    - -    $out = htmLawed($in); -
    -
    -  Allowing all valid HTML markup including javascript: --
    -
    - -    $config = array('schemes'=>'*:*'); -
    - -    $out = htmLawed($in, $config); -
    -
    -  Allowing only safe HTML and the elements a, em, and strong --
    -
    - -    $config = array('safe'=>1, 'elements'=>'a, em, strong'); -
    - -    $out = htmLawed($in, $config); -
    -
    -  Not allowing elements script and object --
    -
    - -    $config = array('elements'=>'* -script -object'); -
    - -    $out = htmLawed($in, $config); -
    -
    -  Not allowing attributes id and style --
    -
    - -    $config = array('deny_attribute'=>'id, style'); -
    - -    $out = htmLawed($in, $config); -
    -
    -  Permitting only attributes title and href --
    -
    - -    $config = array('deny_attribute'=>'* -title -href'); -
    - -    $out = htmLawed($in, $config); -
    -
    -  Remove bad/disallowed tags altogether instead of converting them to entities --
    -
    - -    $config = array('keep_bad'=>0); -
    - -    $out = htmLawed($in, $config); -
    -
    -  Allowing attribute title only in a and not allowing attributes id, style, or scriptable on* attributes like onclick --
    -
    - -    $config = array('deny_attribute'=>'title, id, style, on*'); -
    - -    $spec = 'a=title'; -
    - -    $out = htmLawed($in, $config, $spec); -
    -
    -  Some case-studies are presented below.
    -
    1. A blog administrator wants to allow only a, em, strike, strong and u in comments, but needs strike and u transformed to span for better XHTML 1-strict compliance, and, he wants the a links to be to http or https resources:
    -
    - -    $processed = htmLawed($in, array('elements'=>'a, em, strike, strong, u', 'make_tag_strict'=>1, 'safe'=>1, 'schemes'=>'*:http, https'), 'a=href'); -
    -
    2. An author uses a custom-made web application to load content on his web-site. He is the only one using that application and the content he generates has all types of HTML, including scripts. The web application uses htmLawed primarily as a tool to correct errors that creep in while writing HTML and to take care of the occasional bad characters in copy-paste text introduced by Microsoft Office. The web application provides a preview before submitted input is added to the content. For the previewing process, htmLawed is set up as follows:
    -
    - -    $processed = htmLawed($in, array('css_expression'=>1, 'keep_bad'=>1, 'make_tag_strict'=>1, 'schemes'=>'*:*', 'valid_xhtml'=>1)); -
    -
    -  For the final submission process, keep_bad is set to 6. A value of 1 for the preview process allows the author to note and correct any HTML mistake without losing any of the typed text.
    -
    3. A data-miner is scraping information in a specific table of similar web-pages and is collating the data rows, and uses htmLawed to reduce unnecessary markup and white-spaces:
    -
    - -    $processed = htmLawed($in, array('elements'=>'tr, td', 'tidy'=>-1), 'tr, td ='); -
    - -
    -
    -

    -3  Details -

    (to top)
    -

    -3.1  Invalid/dangerous characters -

    (to top)
    -
    -  Valid characters (more correctly, their code-points) in HTML or XML are, hexadecimally, 9, a, d, 20 to d7ff, and e000 to 10ffff, except fffe and ffff (decimally, 9, 10, 13, 32 to 55295, and 57344 to 1114111, except 65534 and 65535). htmLawed removes the invalid characters 0 to 8, b, c, and e to 1f.
    -
    -  Because of PHP's poor native support for multi-byte characters, htmLawed cannot check for the remaining invalid code-points. However, for various reasons, it is very unlikely for any of those characters to be in the input.
    -
    -  Characters that are discouraged (see section 5.1) but not invalid are not removed by htmLawed.
    -
    -  It (function hl_tag()) also replaces the potentially dangerous (in some Mozilla [Firefox] and Opera browsers) soft-hyphen character (code-point, hexadecimally, ad, or decimally, 173) in attribute values with spaces. Where required, the characters <, >, &, and " are converted to entities.
    -
    -  With $config["clean_ms_char"] set as 1 or 2, many of the discouraged characters (decimal code-points 127 to 159 except 133) that many Microsoft applications incorrectly use (as per the Windows 1252 [Cp-1252] or a similar encoding system), and the character for decimal code-point 133, are converted to appropriate decimal numerical entities (or removed for a few cases)-- see appendix in section 5.4. This can help avoid some display issues arising from copying-pasting of content.
    -
    -  With $config["clean_ms_char"] set as 2, characters for the hexadecimal code-points 82, 91, and 92 (for special single-quotes), and 84, 93, and 94 (for special double-quotes) are converted to ordinary single and double quotes respectively and not to entities.
    -
    -  The character values are replaced with entities/characters and not character values referred to by the entities/characters to keep this task independent of the character-encoding of input text.
    -
    -  The $config["clean_ms_char"] parameter should not be used if authors do not copy-paste Microsoft-created text, or if the input text is not believed to use the Windows 1252 (Cp-1252) or a similar encoding like Cp-1251. Further, the input form and the web-pages displaying it or its content should have the character encoding appropriately marked-up.
    - -
    -

    -3.2  Character references/entities -

    (to top)
    -
    -  Valid character entities take the form &*; where * is #x followed by a hexadecimal number (hexadecimal numeric entity; like &#xA0; for non-breaking space), or alphanumeric like gt (external or named entity; like &nbsp; for non-breaking space), or # followed by a number (decimal numeric entity; like &#160; for non-breaking space). Character entities referring to the soft-hyphen character (the &shy; or \xad character; hexadecimal code-point ad [decimal 173]) in URL-accepting attribute values are always replaced with spaces; soft-hyphens in attribute values introduce vulnerabilities in some older versions of the Opera and Mozilla [Firefox] browsers.
    -
    -  htmLawed (function hl_ent()):
    -
    -  *  Neutralizes entities with multiple leading zeroes or missing semi-colons (potentially dangerous)
    -
    -  *  Lowercases the X (for XML-compliance) and A-F of hexadecimal numeric entities
    -
    -  *  Neutralizes entities referring to characters that are HTML-invalid (see section 3.1)
    -
    -  *  Neutralizes entities referring to characters that are HTML-discouraged (code-points, hexadecimally, 7f to 84, 86 to 9f, and fdd0 to fddf, or decimally, 127 to 132, 134 to 159, and 64991 to 64976). Entities referring to the remaining discouraged characters (see section 5.1 for a full list) are let through.
    -
    -  *  Neutralizes named entities that are not in the specs.
    -
    -  *  Optionally converts valid HTML-specific named entities except &gt;, &lt;, &quot;, and &amp; to decimal numeric ones (hexadecimal if $config["hexdec_entity"] is 2) for generic XML-compliance. For this, $config["named_entity"] should be 1.
    -
    -  *  Optionally converts hexadecimal numeric entities to the more widely supported decimal ones. For this, $config["hexdec_entity"] should be 0.
    -
    -  *  Optionally converts decimal numeric entities to the hexadecimal ones. For this, $config["hexdec_entity"] should be 2.
    -
    Neutralization refers to the entitification of & to &amp;.
    -
    Note: htmLawed does not convert entities to the actual characters represented by them; one can pass the htmLawed output through PHP's html_entity_decode function for that.
    -
    Note: If $config["and_mark"] is set, and set to a value other than 0, then the & characters in the original input are replaced with the control character for the hexadecimal code-point 6 (\x06; & characters introduced by htmLawed, e.g., after converting < to &lt;, are not affected). This allows one to distinguish, say, an &gt; introduced by htmLawed and an &gt; put in by the input writer, and can be helpful in further processing of the htmLawed-processed text (e.g., to identify the character sequence o(><)o to generate an emoticon image). When this feature is active, admins should ensure that the htmLawed output is not directly used in web pages or XML documents as the presence of the \x06 can break documents. Before use in such documents, and preferably before any storage, any remaining \x06 should be changed back to &, e.g., with:
    -
    - -    $final = str_replace("\x06", '&', $prelim); -
    -
    -  Also, see section 3.9.
    - -
    -

    -3.3  HTML elements -

    (to top)
    -
    -  htmLawed can be configured to allow only certain HTML elements (tags) in the input. Disallowed elements (just tag-content, and not element-content), based on $config["keep_bad"], are either neutralized (converted to plain text by entitification of < and >) or removed.
    -
    -  E.g., with only em permitted:
    -
    -  Input:
    -
    - -      <em>My</em> website is <a href="http://a.com>a.com</a>. -
    -
    -  Output, with $config["keep_bad"] = 0:
    -
    - -      <em>My</em> website is a.com. -
    -
    -  Output, with $config["keep_bad"] not 0:
    -
    - -      <em>My</em> website is &lt;a href=""&gt;a.com&lt;/a&gt;. -
    -
    -  See section 3.3.3 for differences between the various non-zero $config["keep_bad"] values.
    -
    -  htmLawed by default permits these 86 elements:
    -
    - -    a, abbr, acronym, address, applet, area, b, bdo, big, blockquote, br, button, caption, center, cite, code, col, colgroup, dd, del, dfn, dir, div, dl, dt, em, embed, fieldset, font, form, h1, h2, h3, h4, h5, h6, hr, i, iframe, img, input, ins, isindex, kbd, label, legend, li, map, menu, noscript, object, ol, optgroup, option, p, param, pre, q, rb, rbc, rp, rt, rtc, ruby, s, samp, script, select, small, span, strike, strong, sub, sup, table, tbody, td, textarea, tfoot, th, thead, tr, tt, u, ul, var -
    -
    -  Except for embed (included because of its wide-spread use) and the Ruby elements (rb, rbc, rp, rt, rtc, ruby; part of XHTML 1.1), these are all the elements in the HTML 4/XHTML 1 specs. Strict-specific specs. exclude center, dir, font, isindex, menu, s, strike, and u.
    -
    -  With $config["safe"] = 1, the default set will exclude applet, embed, iframe, object and script; see section 3.6.
    -
    -  When $config["elements"], which specifies allowed elements, is properly defined, and neither empty nor set to 0 or *, the default set is not used. To have elements added to or removed from the default set, a +/- notation is used. E.g., *-script-object implies that only script and object are disallowed, whereas *+embed means that noembed is also allowed. Elements can also be specified as comma separated names. E.g., a, b, i means only a, b and i are permitted. In this notation, *, + and - have no significance and can actually cause a mis-reading.
    -
    -  Some more examples of $config["elements"] values indicating permitted elements (note that empty spaces are liberally allowed for clarity):
    -
    -  *  a, blockquote, code, em, strong -- only a, blockquote, code, em, and strong
    -  *  *-script -- all excluding script
    -  *  * -center -dir -font -isindex -menu -s -strike -u -- only XHTML-Strict elements
    -  *  *+noembed-script -- all including noembed excluding script
    -
    -  Some mis-usages (and the resulting permitted elements) that can be avoided:
    -
    -  *  -* -- none; instead of htmLawed, one might just use, e.g., the htmlspecialchars() PHP function
    -  *  *, -script -- all except script; admin probably meant *-script
    -  *  -*, a, em, strong -- all; admin probably meant a, em, strong
    -  *  * -- all; admin need not have set elements
    -  *  *-form+form -- all; a + will always over-ride any -
    -  *  *, noembed -- only noembed; admin probably meant *+noembed
    -  *  a, +b, i -- only a and i; admin probably meant a, b, i
    -
    -  Basically, when using the +/- notation, commas (,) should not be used, and vice versa, and * should be used with the former but not the latter.
    -
    Note: Even if an element that is not in the default set is allowed through $config["elements"], like noembed in the last example, it will eventually be removed during tag balancing unless such balancing is turned off ($config["balance"] set to 0). Currently, the only way around this, which actually is simple, is to edit the various arrays in the function hl_bal() to accommodate the element and its nesting properties.
    -
    A possibly second way to specify allowed elements is to set $config["parent"] to an element name that supposedly will hold the input, and to set $config["balance"] to 1. During tag balancing (see section 3.3.3), all elements that cannot legally nest inside the parent element will be removed. The parent element is auto-reset to div if $config["parent"] is empty, body, or an element not in htmLawed's default set of 86 elements.
    -
    Tag transformation is possible for improving XHTML-Strict compliance -- most of the deprecated elements are removed or converted to valid XHTML-Strict ones; see section 3.3.2.
    - -

    -3.3.1  Handling of comments and CDATA sections -

    (to top)
    -
    CDATA sections have the format <![CDATA[...anything but not "]]>"...]]>, and HTML comments, <!--...anything but not "-->"... -->. Neither HTML comments nor CDATA sections can reside inside tags. HTML comments can exist anywhere else, but CDATA sections can exist only where plain text is allowed (e.g., immediately inside td element content but not immediately inside tr element content).
    -
    -  htmLawed (function hl_cmtcd()) handles HTML comments or CDATA sections depending on the values of $config["comment"] or $config["cdata"]. If 0, such markup is not looked for and the text is processed like plain text. If 1, it is removed completely. If 2, it is preserved but any <, > and & inside are changed to entities. If 3, they are left as such.
    -
    -  Note that for the last two cases, HTML comments and CDATA sections will always be removed from tag content (function hl_tag()).
    -
    -  Examples:
    -
    -  Input:
    - -    <!-- home link --><a href="home.htm"><![CDATA[x=&y]]>Home</a> -
    -  Output ($config["comment"] = 0, $config["cdata"] = 2):
    - -    &lt;-- home link --&gt;<a href="home.htm"><![CDATA[x=&amp;y]]>Home</a> -
    -  Output ($config["comment"] = 1, $config["cdata"] = 2):
    - -    <a href="home.htm"><![CDATA[x=&amp;y]]>Home</a> -
    -  Output ($config["comment"] = 2, $config["cdata"] = 2):
    - -    <!-- home link --><a href="home.htm"><![CDATA[x=&amp;y]]>Home</a> -
    -  Output ($config["comment"] = 2, $config["cdata"] = 1):
    - -    <!-- home link --><a href="home.htm">Home</a> -
    -  Output ($config["comment"] = 3, $config["cdata"] = 3):
    - -    <!-- home link --><a href="home.htm"><![CDATA[x=&y]]>Home</a> -
    -
    -  For standard-compliance, comments are given the form <!--comment -->, and any -- in the content is made -.
    -
    -  When $config["safe"] = 1, CDATA sections and comments are considered plain text unless $config["comment"] or $config["cdata"] is explicitly specified; see section 3.6.
    - -
    -

    -3.3.2  Tag-transformation for better XHTML-Strict -

    (to top)
    -
    -  If $config["make_tag_strict"] is set and not 0, following non-XHTML-Strict elements (and attributes), even if admin-permitted, are mutated as indicated (element content remains intact; function hl_tag2()):
    -
    -  *  applet - (based on $config["make_tag_strict"], unchanged (1) or removed (2))
    -  *  center - div style="text-align: center;"
    -  *  dir - ul
    -  *  embed - (based on $config["make_tag_strict"], unchanged (1) or removed (2))
    -  *  font (face, size, color) -    span style="font-family: ; font-size: ; color: ;" (size transformation reference)
    -  *  isindex - (based on $config["make_tag_strict"], unchanged (1) or removed (2))
    -  *  menu - ul
    -  *  s - span style="text-decoration: line-through;"
    -  *  strike - span style="text-decoration: line-through;"
    -  *  u - span style="text-decoration: underline;"
    -
    -  For an element with a pre-existing style attribute value, the extra style properties are appended.
    -
    -  Example input:
    -
    - -    <center> -
    - -     The PHP <s>software</s> script used for this <strike>web-page</strike> web-page is <font style="font-weight: bold " face=arial size='+3' color   =  "red  ">htmLawedTest.php</font>, from <u style= 'color:green'>PHP Labware</u>. -
    - -    </center> -
    -
    -  The output:
    -
    - -    <div style="text-align: center;"> -
    - -     The PHP <span style="text-decoration: line-through;">software</span> script used for this <span style="text-decoration: line-through;">web-page</span> web-page is <span style="font-weight: bold; font-family: arial; color: red; font-size: 200%;">htmLawedTest.php</span>, from <span style="color:green; text-decoration: underline;">PHP Labware</span>. -
    - -    </div> -
    - -
    -

    -3.3.3  Tag balancing and proper nesting -

    (to top)
    -
    -  If $config["balance"] is set to 1, htmLawed (function hl_bal()) checks and corrects the input to have properly balanced tags and legal element content (i.e., any element nesting should be valid, and plain text may be present only in the content of elements that allow them).
    -
    -  Depending on the value of $config["keep_bad"] (see section 2.2 and section 3.3), illegal content may be removed or neutralized to plain text by converting < and > to entities:
    -
    0 - remove; this option is available only to maintain Kses-compatibility and should not be used otherwise (see section 2.6)
    1 - neutralize tags and keep element content
    2 - remove tags but keep element content
    3 and 4 - like 1 and 2, but keep element content only if text (pcdata) is valid in parent element as per specs
    5 and 6 -  like 3 and 4, but line-breaks, tabs and spaces are left
    -
    -  Example input (disallowing the p element):
    -
    - -    <*> Pseudo-tags <*> -
    - -    <xml>Non-HTML tag xml</xml> -
    - -    <p> -
    - -    Disallowed tag p -
    - -    </p> -
    - -    <ul>Bad<li>OK</li></ul> -
    -
    -  The output with $config["keep_bad"] = 1:
    -
    - -    &lt;*&gt; Pseudo-tags &lt;*&gt; -
    - -    &lt;xml&gt;Non-HTML tag xml&lt;/xml&gt; -
    - -    &lt;p&gt; -
    - -    Disallowed tag p -
    - -    &lt;/p&gt; -
    - -    <ul>Bad<li>OK</li></ul> -
    -
    -  The output with $config["keep_bad"] = 3:
    -
    - -    &lt;*&gt; Pseudo-tags &lt;*&gt; -
    - -    &lt;xml&gt;Non-HTML tag xml&lt;/xml&gt; -
    - -    &lt;p&gt; -
    - -    Disallowed tag p -
    - -    &lt;/p&gt; -
    - -    <ul><li>OK</li></ul> -
    -
    -  The output with $config["keep_bad"] = 6:
    -
    - -    &lt;*&gt; Pseudo-tags &lt;*&gt; -
    - -    Non-HTML tag xml -
    -
    - -    Disallowed tag p -
    -
    - -    <ul><li>OK</li></ul> -
    -
    -  An option like 1 is useful, e.g., when a writer previews his submission, whereas one like 3 is useful before content is finalized and made available to all.
    -
    Note: In the example above, unlike <*>, <xml> gets considered as a tag (even though there is no HTML element named xml). In general, text matching the regular expression pattern <(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?> is considered a tag (phrase enclosed by the angled brackets < and >, and starting [with an optional slash preceding] with an alphanumeric word that starts with an alphabet...).
    -
    -  Nesting/content rules for each of the 86 elements in htmLawed's default set (see section 3.3) are defined in function hl_bal(). This means that if a non-standard element besides embed is being permitted through $config["elements"], the element's tag content will end up getting removed if $config["balance"] is set to 1.
    -
    -  Plain text and/or certain elements nested inside blockquote, form, map and noscript need to be in block-level elements. This point is often missed during manual writing of HTML code. htmLawed attempts to address this during balancing. E.g., if the parent container is set as form, the input B:<input type="text" value="b" />C:<input type="text" value="c" /> is converted to <div>B:<input type="text" value="b" />C:<input type="text" value="c" /></div>.
    - -
    -

    -3.3.4  Elements requiring child elements -

    (to top)
    -
    -  As per specs, the following elements require legal child elements nested inside them:
    -
    - -    blockquote, dir, dl, form, map, menu, noscript, ol, optgroup, rbc, rtc, ruby, select, table, tbody, tfoot, thead, tr, ul -
    -
    -  In some cases, the specs stipulate the number and/or the ordering of the child elements. A table can have 0 or 1 caption, tbody, tfoot, and thead, but they must be in this order: caption, thead, tfoot, tbody.
    -
    -  htmLawed currently does not check for conformance to these rules. Note that any non-compliance in this regard will not introduce security vulnerabilities, crash browser applications, or affect the rendering of web-pages.
    -
    -  With $config["direct_list_nest"] set to 1, htmLawed will allow direct nesting of an ol or ul list within another ol or ul without requiring the child list to be within an li of the parent list. While this is not standard-compliant, directly nested lists are rendered properly by almost all browsers. The parameter $config["direct_list_nest"] has no effect if tag-balancing (section 3.3.3) is turned off.
    - -
    -

    -3.3.5  Beautify or compact HTML -

    (to top)
    -
    -  By default, htmLawed will neither beautify HTML code by formatting it with indentations, etc., nor will it make it compact by removing un-needed white-space.(It does always properly white-space tag content.)
    -
    -  As per the HTML standards, spaces, tabs and line-breaks in web-pages (except those inside pre elements) are all considered equivalent, and referred to as white-spaces. Browser applications are supposed to consider contiguous white-spaces as just a single space, and to disregard white-spaces trailing opening tags or preceding closing tags. This white-space normalization allows the use of text/code beautifully formatted with indentations and line-spacings for readability. Such pretty HTML can, however, increase the size of web-pages, or make the extraction or scraping of plain text cumbersome.
    -
    -  With the $config parameter tidy, htmLawed can be used to beautify or compact the input text. Input with just plain text and no HTML markup is also subject to this. Besides pre, the script and textarea elements, CDATA sections, and HTML comments are not subjected to the tidying process.
    -
    -  To compact, use $config["tidy"] = -1; single instances or runs of white-spaces are replaced with a single space, and white-spaces trailing and leading open and closing tags, respectively, are removed.
    -
    -  To beautify, $config["tidy"] is set as 1, or for customized tidying, as a string like 2s2n. The s or t character specifies the use of spaces or tabs for indentation. The first and third characters, any of the digits 0-9, specify the number of spaces or tabs per indentation, and any parental lead spacing (extra indenting of the whole block of input text). The r and n characters are used to specify line-break characters: n for \n (Unix/Mac OS X line-breaks), rn or nr for \r\n (Windows/DOS line-breaks), or r for \r.
    -
    -  The $config["tidy"] value of 1 is equivalent to 2s0n. Other $config["tidy"] values are read loosely: a value of 4 is equivalent to 4s0n; t2, to 1t2n; s, to 2s0n; 2TR, to 2t0r; T1, to 1t1n; nr3, to 3s0nr, and so on. Except in the indentations and line-spacings, runs of white-spaces are replaced with a single space during beautification.
    -
    -  Input formatting using $config["tidy"] is not recommended when input text has mixed markup (like HTML + PHP).
    - -
    -
    -

    -3.4  Attributes -

    (to top)
    -
    -  htmLawed will only permit attributes described in the HTML specs (including deprecated ones). It also permits some attributes for use with the embed element (the non-standard embed element is supported in htmLawed because of its widespread use), and the the xml:space attribute (valid only in XHTML 1.1). A list of such 111 attributes and the elements they are allowed in is in section 5.2.
    -
    -  When $config["deny_attribute"] is not set, or set to 0, or empty (""), all the 111 attributes are permitted. Otherwise, $config["deny_attribute"] can be set as a list of comma-separated names of the denied attributes. on* can be used to refer to the group of potentially dangerous, script-accepting attributes: onblur, onchange, onclick, ondblclick, onfocus, onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, onreset, onselect and onsubmit.
    -
    -  Note that attributes specified in $config["deny_attribute"] are denied globally, for all elements. To deny attributes for only specific elements, $spec (see section 2.3) can be used. $spec can also be used to element-specifically permit an attribute otherwise denied through $config["deny_attribute"].
    -
    -  With $config["safe"] = 1 (section 3.6), the on* attributes are automatically disallowed.
    -
    Note: To deny all but a few attributes globally, a simpler way to specify $config["deny_attribute"] would be to use the notation * -attribute1 -attribute2 .... Thus, a value of * -title -href implies that except href and title (where allowed as per standards) all other attributes are to be removed. With this notation, the value for the parameter safe (section 3.6) will have no effect on deny_attribute.
    -
    -  htmLawed (function hl_tag()) also:
    -
    -  *  Lower-cases attribute names
    -  *  Removes duplicate attributes (last one stays)
    -  *  Gives attributes the form name="value" and single-spaces them, removing unnecessary white-spacing
    -  *  Provides required attributes (see section 3.4.1)
    -  *  Double-quotes values and escapes any " inside them
    -  *  Replaces the possibly dangerous soft-hyphen characters (hexadecimal code-point ad) in the values with spaces
    -  *  Allows custom function to additionally filter/modify attribute values (see section 3.4.9)
    - -

    -3.4.1  Auto-addition of XHTML-required attributes -

    (to top)
    -
    -  If indicated attributes for the following elements are found missing, htmLawed (function hl_tag()) will add them (with values same as attribute names unless indicated otherwise below):
    -
    -  *  area - alt (area)
    -  *  area, img - src, alt (image)
    -  *  bdo - dir (ltr)
    -  *  form - action
    -  *  map - name
    -  *  optgroup - label
    -  *  param - name
    -  *  script - type (text/javascript)
    -  *  textarea - rows (10), cols (50)
    -
    -  Additionally, with $config["xml:lang"] set to 1 or 2, if the lang but not the xml:lang attribute is declared, then the latter is added too, with a value copied from that of lang. This is for better standard-compliance. With $config["xml:lang"] set to 2, the lang attribute is removed (XHTML 1.1 specs).
    -
    -  Note that the name attribute for map, invalid in XHTML 1.1, is also transformed if required -- see section 3.4.6.
    - -
    -

    -3.4.2  Duplicate/invalid id values -

    (to top)
    -
    -  If $config["unique_ids"] is 1, htmLawed (function hl_tag()) removes id attributes with values that are not XHTML-compliant (must begin with a letter and can contain letters, digits, :, ., - and _) or duplicate. If $config["unique_ids"] is a word, any duplicate but otherwise valid value will be appropriately prefixed with the word to ensure its uniqueness. The word should begin with a letter and should contain only letters, numbers, :, ., _ and -.
    -
    -  Even if multiple inputs need to be filtered (through multiple calls to htmLawed), htmLawed ensures uniqueness of id values as it uses a global variable ($GLOBALS["hl_Ids"] array). Further, an admin can restrict the use of certain id values by presetting this variable before htmLawed is called into use. E.g.:
    -
    - -    $GLOBALS['hl_Ids'] = array('top'=>1, 'bottom'=>1, 'myform'=>1); // id values not allowed in input -
    - -    $processed = htmLawed($text); // filter input -
    - -
    -

    -3.4.3  URL schemes (protocols) and scripts in attribute values -

    (to top)
    -
    -  htmLawed edits attributes that take URLs as values if they are found to contain un-permitted schemes. E.g., if the afp scheme is not permitted, then <a href="afp://domain.org"> becomes <a href="denied:afp://domain.org">, and if Javascript is not permitted <a onclick="javascript:xss();"> becomes <a onclick="denied:javascript:xss();">.
    -
    -  By default htmLawed permits these schemes in URLs for the href attribute:
    -
    - -    aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet -
    -
    -  Also, only file, http and https are permitted in attributes whose names start with o (like onmouseover), and in these attributes that accept URLs:
    -
    - -    action, cite, classid, codebase, data, href, longdesc, model, pluginspage, pluginurl, src, style, usemap -
    -
    -  These default sets are used when $config["schemes"] is not set (see section 2.2). To over-ride the defaults, $config["schemes"] is defined as a string of semi-colon-separated sub-strings of type attribute: comma-separated schemes. E.g., href: mailto, http, https; onclick: javascript; src: http, https. For unspecified attributes, file, http and https are permitted. This can be changed by passing schemes for * in $config["schemes"]. E.g., href: mailto, http, https; *: https, https.
    -
    * can be put in the list of schemes to permit all protocols. E.g., style: *; img: http, https results in protocols not being checked in style attribute values. However, in such cases, any relative-to-absolute URL conversion, or vice versa, (section 3.4.4) is not done.
    -
    -  Thus, to allow Javascript, one can set $config["schemes"] as href: mailto, http, https; *: http, https, javascript, or href: mailto, http, https, javascript; *: http, https, javascript, or *: *, and so on.
    -
    -  As a side-note, one may find style: * useful as URLs in style attributes can be specified in a variety of ways, and the patterns that htmLawed uses to identify URLs may mistakenly identify non-URL text.
    -
    ! can be put in the list of schemes to disallow all protocols as well as local URLs. Thus, with href: http, style: !, '<a href="http://cnn.com" style="background-image: url('local.jpg');">CNN</a>' will become '<a href="http://cnn.com" style="background-image: url('denied:local.jpg');">CNN</a>'.
    -
    Note: If URL-accepting attributes other than those listed above are being allowed, then the scheme will not be checked unless the attribute name contains the string src (e.g., dynsrc) or starts with o (e.g., onbeforecopy).
    -
    -  With $config["safe"] = 1, all URLs are disallowed in the style attribute values.
    - -
    -

    -3.4.4  Absolute & relative URLs in attribute values -

    (to top)
    -
    -  htmLawed can make absolute URLs in attributes like href relative ($config["abs_url"] is -1), and vice versa ($config["abs_url"] is 1). URLs in scripts are not considered for this, and so are URLs like #section_6 (fragment), ?name=Tim#show (starting with query string), and ;var=1?name=Tim#show (starting with parameters). Further, this requires that $config["base_url"] be set properly, with the :// and a trailing slash (/), with no query string, etc. E.g., file:///D:/page/, https://abc.com/x/y/, or http://localhost/demo/ are okay, but file:///D:/page/?help=1, abc.com/x/y/ and http://localhost/demo/index.htm are not.
    -
    -  For making absolute URLs relative, only those URLs that have the $config["base_url"] string at the beginning are converted. E.g., with $config["base_url"] = "https://abc.com/x/y/", https://abc.com/x/y/a.gif and https://abc.com/x/y/z/b.gif become a.gif and z/b.gif respectively, while https://abc.com/x/c.gif is not changed.
    -
    -  When making relative URLs absolute, only values for scheme, network location (host-name) and path values in the base URL are inherited. See section 5.5 for more about the URL specification as per RFC 1808.
    - -
    -

    -3.4.5  Lower-cased, standard attribute values -

    (to top)
    -
    -  Optionally, for standard-compliance, htmLawed (function hl_tag()) lower-cases standard attribute values to give, e.g., input type="password" instead of input type="Password", if $config["lc_std_val"] is 1. Attribute values matching those listed below for any of the elements (plus those for the type attribute of button or input) are lower-cased:
    -
    - -    all, baseline, bottom, button, center, char, checkbox, circle, col, colgroup, cols, data, default, file, get, groups, hidden, image, justify, left, ltr, middle, none, object, password, poly, post, preserve, radio, rect, ref, reset, right, row, rowgroup, rows, rtl, submit, text, top -
    -
    - -    a, area, bdo, button, col, form, img, input, object, option, optgroup, param, script, select, table, td, tfoot, th, thead, tr, xml:space -
    -
    -  The following empty (minimized) attributes are always assigned lower-cased values (same as the names):
    -
    - -    checked, compact, declare, defer, disabled, ismap, multiple, nohref, noresize, noshade, nowrap, readonly, selected -
    - -
    -

    -3.4.6  Transformation of deprecated attributes -

    (to top)
    -
    -  If $config["no_deprecated_attr"] is 0, then deprecated attributes (see appendix in section 5.2) are removed and, in most cases, their values are transformed to CSS style properties and added to the style attributes (function hl_tag()). Except for bordercolor for table, tr and td, the scores of proprietary attributes that were never part of any cross-browser standard are not supported.
    -
    Note: The attribute target for a is allowed even though it is not in XHTML 1.0 specs. This is because of the attribute's wide-spread use and browser-support, and because the attribute is valid in XHTML 1.1 onwards.
    -
    -  *  align - for img with value of left or right, becomes, e.g., float: left; for div and table with value center, becomes margin: auto; all others become, e.g., text-align: right
    -
    -  *  bgcolor - E.g., bgcolor="#ffffff" becomes background-color: #ffffff
    -  *  border - E.g., height= "10" becomes height: 10px
    -  *  bordercolor - E.g., bordercolor=#999999 becomes border-color: #999999;
    -  *  compact - font-size: 85%
    -  *  clear - E.g., 'clear="all" becomes clear: both
    -
    -  *  height - E.g., height= "10" becomes height: 10px and height="*" becomes height: auto
    -
    -  *  hspace - E.g., hspace="10" becomes margin-left: 10px; margin-right: 10px
    -  *  language - language="VBScript" becomes type="text/vbscript"
    -  *  name - E.g., name="xx" becomes id="xx"
    -  *  noshade - border-style: none; border: 0; background-color: gray; color: gray
    -  *  nowrap - white-space: nowrap
    -  *  size - E.g., size="10" becomes height: 10px
    -  *  start - removed
    -  *  type - E.g., type="i" becomes list-style-type: lower-roman
    -  *  value - removed
    -  *  vspace - E.g., vspace="10" becomes margin-top: 10px; margin-bottom: 10px
    -  *  width - like height
    -
    -  Example input:
    -
    - -    <img src="j.gif" alt="image" name="dad's" /><img src="k.gif" alt="image" id="dad_off" name="dad" /> -
    - -    <br clear="left" /> -
    - -    <hr noshade size="1" /> -
    - -    <img name="img" src="i.gif" align="left" alt="image" hspace="10" vspace="10" width="10em" height="20" border="1" style="padding:5px;" /> -
    - -    <table width="50em" align="center" bgcolor="red"> -
    - -     <tr> -
    - -      <td width="20%"> -
    - -       <div align="center"> -
    - -        <h3 align="right">Section</h3> -
    - -        <p align="right">Para</p> -
    - -        <ol type="a" start="e"><li value="x">First item</li></ol> -
    - -       </div> -
    - -      </td> -
    - -      <td width="*"> -
    - -       <ol type="1"><li>First item</li></ol> -
    - -      </td> -
    - -     </tr> -
    - -    </table> -
    - -    <br clear="all" /> -
    -
    -  And the output with $config["no_deprecated_attr"] = 1:
    -
    - -    <img src="j.gif" alt="image" /><img src="k.gif" alt="image" id="dad_off" /> -
    - -    <br style="clear: left;" /> -
    - -    <hr style="border-style: none; border: 0; background-color: gray; color: gray; size: 1px;" /> -
    - -    <img src="i.gif" alt="image" width="10em" height="20" style="padding:5px; float: left; margin-left: 10px; margin-right: 10px; margin-top: 10px; margin-bottom: 10px; border: 1px;" id="img" /> -
    - -    <table width="50em" style="margin: auto; background-color: red;"> -
    - -     <tr> -
    - -      <td style="width: 20%;"> -
    - -       <div style="margin: auto;"> -
    - -        <h3 style="text-align: right;">Section</h3> -
    - -        <p style="text-align: right;">Para</p> -
    - -        <ol style="list-style-type: lower-latin;"><li>First item</li></ol> -
    - -       </div> -
    - -      </td> -
    - -      <td style="width: auto;"> -
    - -       <ol style="list-style-type: decimal;"><li>First item</li></ol> -
    - -      </td> -
    - -     </tr> -
    - -    </table> -
    - -    <br style="clear: both;" /> -
    -
    -  For lang, deprecated in XHTML 1.1, transformation is taken care of through $config["xml:lang"]; see section 3.4.1.
    -
    -  The attribute name is deprecated in form, iframe, and img, and is replaced with id if an id attribute doesn't exist and if the name value is appropriate for id. For such replacements for a and map, for which the name attribute is deprecated in XHTML 1.1, $config["no_deprecated_attr"] should be set to 2 (when set to 1, for these two elements, the name attribute is retained).
    - -
    -

    -3.4.7  Anti-spam & href -

    (to top)
    -
    -  htmLawed (function hl_tag()) can check the href attribute values (link addresses) as an anti-spam (email or link spam) measure.
    -
    -  If $config["anti_mail_spam"] is not 0, the @ of email addresses in href values like mailto:a@b.com is replaced with text specified by $config["anti_mail_spam"]. The text should be of a form that makes it clear to others that the address needs to be edited before a mail is sent; e.g., <remove_this_antispam>@ (makes the example address a<remove_this_antispam>@b.com).
    -
    -  For regular links, one can choose to have a rel attribute with nofollow in its value (which tells some search engines to not follow a link). This can discourage link spammers. Additionally, or as an alternative, one can choose to empty the href value altogether (disable the link).
    -
    -  For use of these options, $config["anti_link_spam"] should be set as an array with values regex1 and regex2, both or one of which can be empty (like array("", "regex2")) to indicate that that option is not to be used. Otherwise, regex1 or regex2 should be PHP- and PCRE-compatible regular expression patterns: href values will be matched against them and those matching the pattern will accordingly be treated.
    -
    -  Note that the regular expressions should have delimiters, and be well-formed and preferably fast. Absolute efficiency/accuracy is often not needed.
    -
    -  An example, to have a rel attribute with nofollow for all links, and to disable links that do not point to domains abc.com and xyz.org:
    -
    - -    $config["anti_link_spam"] = array('`.`', '`://\W*(?!(abc\.com|xyz\.org))`'); -
    - -
    -

    -3.4.8  Inline style properties -

    (to top)
    -
    -  htmLawed can check URL schemes and dynamic expressions (to guard against Javascript, etc., script-based insecurities) in inline CSS style property values in the style attributes. (CSS properties like background-image that accept URLs in their values are noted in section 5.3.) Dynamic CSS expressions that allow scripting in the IE browser, and can be a vulnerability, can be removed from property values by setting $config["css_expression"] to 1 (default setting). Note that when $config["css_expression"] is set to 1, htmLawed will remove /* from the style values.
    -
    Note: Because of the various ways of representing characters in attribute values (URL-escapement, entitification, etc.), htmLawed might alter the values of the style attribute values, and may even falsely identify dynamic CSS expressions and URL schemes in them. If this is an important issue, checking of URLs and dynamic expressions can be turned off ($config["schemes"] = "...style:*...", see section 3.4.3, and $config["css_expression"] = 0). Alternately, admins can use their own custom function for finer handling of style values through the hook_tag parameter (see section 3.4.9).
    -
    -  It is also possible to have htmLawed let through any style value by setting $config["style_pass"] to 1.
    -
    -  As such, it is better to set up a CSS file with class declarations, disallow the style attribute, set a $spec rule (see section 2.3) for class for the oneof or match parameter, and ask writers to make use of the class attribute.
    - -
    -

    -3.4.9  Hook function for tag content -

    (to top)
    -
    -  It is possible to utilize a custom hook function to alter the tag content htmLawed has finalized (i.e., after it has checked/corrected for required attributes, transformed attributes, lower-cased attribute names, etc.).
    -
    -  When $config parameter hook_tag is set to the name of a function, htmLawed (function hl_tag()) will pass on the element name, and, in the case of an opening tag, the finalized attribute name-value pairs as array elements to the function. The function, after completing a task such as filtering or tag transformation, will typically return an empty string, the full opening tag string like <element_name attribute_1_name="attribute_1_value"...> (for empty elements like img and input, the element-closing slash / should also be included), etc.
    -
    -  Any hook_tag function, since htmLawed version 1.1.11, also receives names of elements in closing tags, such as a in the closing </a> tag of the element <a href="http://cnn.com">CNN</a>. Unlike for opening tags, no other value (i.e., the attribute name-value array) is passed to the function since a closing tag contains only element names. Typically, the function will return an empty string or a full closing tag (like </a>).
    -
    -  This is a powerful functionality that can be exploited for various objectives: consolidate-and-convert inline style attributes to class, convert embed elements to object, permit only one caption element in a table element, disallow embedding of certain types of media, inject HTML, use CSSTidy to sanitize style attribute values, etc.
    -
    -  As an example, the custom hook code below can be used to force a series of specifically ordered id attributes on all elements, and a specific param element inside all object elements:
    -
    - -    function my_tag_function($element, $attribute_array=0){ -
    -
    - -      // If second argument is not received, it means a closing tag is being handled -
    - -      if(is_numeric($attribute_array)){ -
    - -        return "</$element>"; -
    - -      } -
    -
    - -      static $id = 0; -
    - -      // Remove any duplicate element -
    - -      if($element == 'param' && isset($attribute_array['allowscriptaccess'])){ -
    - -        return ''; -
    - -      } -
    -
    - -      $new_element = ''; -
    -
    - -      // Force a serialized ID number -
    - -      $attribute_array['id'] = 'my_'. $id; -
    - -      ++$id; -
    -
    - -      // Inject param for allowscriptaccess -
    - -      if($element == 'object'){ -
    - -        $new_element = '<param id='my_'. $id; allowscriptaccess="never" />'; -
    - -        ++$id; -
    - -      } -
    -
    - -      $string = ''; -
    - -      foreach($attribute_array as $k=>$v){ -
    - -        $string .= " {$k}=\"{$v}\""; -
    - -      } -
    -
    - -      static $empty_elements = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); -
    -
    - -      return "<{$element}{$string}". (isset($in_array($element, $empty_elements) ? ' /' : ''). '>'. $new_element; -
    - -    } -
    -
    -  The hook_tag parameter is different from the hook parameter (section 3.7).
    -
    -  Snippets of hook function code developed by others may be available on the htmLawed website.
    - -
    -
    -

    -3.5  Simple configuration directive for most valid XHTML -

    (to top)
    -
    -  If $config["valid_xhtml"] is set to 1, some relevant $config parameters (indicated by ~ in section 2.2) are auto-adjusted. This allows one to pass the $config argument with a simpler value. If a value for a parameter auto-set through valid_xhtml is still manually provided, then that value will over-ride the auto-set value.
    - -
    -

    -3.6  Simple configuration directive for most safe HTML -

    (to top)
    -
    Safe HTML refers to HTML that is restricted to reduce the vulnerability for scripting attacks (such as XSS) based on HTML code which otherwise may still be legal and compliant with the HTML standard specs. When elements such as script and object, and attributes such as onmouseover and style are allowed in the input text, an input writer can introduce malevolent HTML code. Note that what is considered safe depends on the nature of the web application and the trust-level accorded to its users.
    -
    -  htmLawed allows an admin to use $config["safe"] to auto-adjust multiple $config parameters (such as elements which declares the allowed element-set), which otherwise would have to be manually set. The relevant parameters are indicated by " in section 2.2). Thus, one can pass the $config argument with a simpler value.
    -
    -  With the value of 1, htmLawed considers CDATA sections and HTML comments as plain text, and prohibits the applet, embed, iframe, object and script elements, and the on* attributes like onclick. ( There are $config parameters like css_expression that are not affected by the value set for safe but whose default values still contribute towards a more safe output.) Further, URLs with schemes (see section 3.4.3) are neutralized so that, e.g., style="moz-binding:url(http://danger)" becomes style="moz-binding:url(denied:http://danger)".
    -
    -  Admins, however, may still want to completely deny the style attribute, e.g., with code like
    -
    - -    $processed = htmLawed($text, array('safe'=>1, 'deny_attribute'=>'style')); -
    -
    -  Permitting the style attribute brings in risks of click-jacking, etc. CSS property values can render a page non-functional or be used to deface it. Except for URLs, dynamic expressions, and some other things, htmLawed does not completely check style values. It does provide ways for the code-developer implementing htmLawed to do such checks through the $spec argument, and through the hook_tag parameter (see section 3.4.8 for more). Disallowing style completely and relying on CSS classes and stylesheet files is recommended.
    -
    -  If a value for a parameter auto-set through safe is still manually provided, then that value can over-ride the auto-set value. E.g., with $config["safe"] = 1 and $config["elements"] = "*+script", script, but not applet, is allowed.
    -
    -  A page illustrating the efficacy of htmLawed's anti-XSS abilities with safe set to 1 against XSS vectors listed by RSnake may be available here.
    - -
    -

    -3.7  Using a hook function -

    (to top)
    -
    -  If $config["hook"] is not set to 0, then htmLawed will allow preliminarily processed input to be altered by a hook function named by $config["hook"] before starting the main work (but after handling of characters, entities, HTML comments and CDATA sections -- see code for function htmLawed()).
    -
    -  The hook function also allows one to alter the finalized values of $config and $spec.
    -
    -  Note that the hook parameter is different from the hook_tag parameter (section 3.4.9).
    -
    -  Snippets of hook function code developed by others may be available on the htmLawed website.
    - -
    -

    -3.8  Obtaining finalized parameter values -

    (to top)
    -
    -  htmLawed can assign the finalized $config and $spec values to a variable named by $config["show_setting"]. The variable, made global by htmLawed, is set as an array with three keys: config, with the $config value, spec, with the $spec value, and time, with a value that is the Unix time (the output of PHP's microtime() function) when the value was assigned. Admins should use a PHP-compliant variable name (e.g., one that does not begin with a numerical digit) that does not conflict with variable names in their non-htmLawed code.
    -
    -  The values, which are also post-hook function (if any), can be used to auto-generate information (on, e.g., the elements that are permitted) for input writers.
    - -
    -

    -3.9  Retaining non-HTML tags in input with mixed markup -

    (to top)
    -
    -  htmLawed does not remove certain characters that though invalid are nevertheless discouraged in HTML documents as per the specs (see section 5.1). This can be utilized to deal with input that contains mixed markup. Input that may have HTML markup as well as some other markup that is based on the <, > and & characters is considered to have mixed markup. The non-HTML markup can be rather proprietary (like markup for emoticons/smileys), or standard (like MathML or SVG). Or it can be programming code meant for execution/evaluation (such as embedded PHP code).
    -
    -  To deal with such mixed markup, the input text can be pre-processed to hide the non-HTML markup by specifically replacing the <, > and & characters with some of the HTML-discouraged characters (see section 3.1.2). Post-htmLawed processing, the replacements are reverted.
    -
    -  An example (mixed HTML and PHP code in input text):
    -
    - -    $text = preg_replace('`<\?php(.+?)\?>`sm', "\x83?php\\1?\x84", $text); -
    - -    $processed = htmLawed($text); -
    - -    $processed = preg_replace('`\x83\?php(.+?)\?\x84`sm', '<?php$1?>', $processed); -
    -
    -  This code will not work if $config["clean_ms_char"] is set to 1 (section 3.1), in which case one should instead deploy a hook function (section 3.7). (htmLawed internally uses certain control characters, code-points 1 to 7, and use of these characters as markers in the logic of hook functions may cause issues.)
    -
    -  Admins may also be able to use $config["and_mark"] to deal with such mixed markup; see section 3.2.
    - -
    -
    -

    -4  Other -

    (to top)
    -

    -4.1  Support -

    (to top)
    -
    -  A careful re-reading of this documentation will very likely answer your questions.
    -
    -  Software updates and forum-based community-support may be found at http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed. For general PHP issues (not htmLawed-specific), support may be found through internet searches and at http://php.net.
    - -
    -

    -4.2  Known issues -

    (to top)
    -
    -  See section 2.8.
    -
    -  Readers are advised to cross-check information given in this document.
    - -
    -

    -4.3  Change-log -

    (to top)
    -
    -  (The release date for the downloadable package of files containing documentation, demo script, test-cases, etc., besides the htmLawed.php file may be updated independently if the secondary files are revised.)
    -
    Version number - Release date. Notes
    -
    -  1.1.11 - 5 June 2012. Fix for possible problem with handling of multi-byte characters in attribute values in an mbstring.func_overload enviroment. $config["hook_tag"], if specified, now receives names of elements in closing tags.
    -
    -  1.1.10 - 22 October 2011. Fix for a bug in the tidy functionality that caused the entire input to be replaced with a single space; new parameter, $config["direct_list_nest"] to allow direct descendance of a list in a list. (5 April 2012. Dual licensing from LGPLv3 to LGPLv3 and GPLv2+.)
    -
    -  1.1.9.5 - 6 July 2011. Minor correction of a rule for nesting of li within dir
    -
    -  1.1.9.4 - 3 July 2010. Parameter schemes now accepts ! so any URL, even a local one, can be denied. An issue in which a second URL value in style properties was not checked was fixed.
    -
    -  1.1.9.3 - 17 May 2010. Checks for correct nesting of param
    -
    -  1.1.9.2 - 26 April 2010. Minor fix regarding rendering of denied URL schemes
    -
    -  1.1.9.1 - 26 February 2010. htmLawed now uses the LGPL version 3 license; support for flashvars attribute for embed
    -
    -  1.1.9 - 22 December 2009. Soft-hyphens are now removed only from URL-accepting attribute values
    -
    -  1.1.8.1 - 16 July 2009. Minor code-change to fix a PHP error notice
    -
    -  1.1.8 - 23 April 2009. Parameter deny_attribute now accepts the wild-card *, making it simpler to specify its value when all but a few attributes are being denied; fixed a bug in interpreting $spec
    -
    -  1.1.7 - 11-12 March 2009. Attributes globally denied through deny_attribute can be allowed element-specifically through $spec; $config["style_pass"] allowing letting through any style value introduced; altered logic to catch certain types of dynamic crafted CSS expressions
    -
    -  1.1.3-6 - 28-31 January - 4 February 2009. Altered logic to catch certain types of dynamic crafted CSS expressions
    -
    -  1.1.2 - 22 January 2009. Fixed bug in parsing of font attributes during tag transformation
    -
    -  1.1.1 - 27 September 2008. Better nesting correction when omitable closing tags are absent
    -
    -  1.1 - 29 June 2008. $config["hook_tag"] and $config["format"] introduced for custom tag/attribute check/modification/injection and output compaction/beautification; fixed a regex-in-$spec parsing bug
    -
    -  1.0.9 - 11 June 2008. Fixed bug in invalid HTML code-point entity check
    -
    -  1.0.8 - 15 May 2008. bordercolor attribute for table, td and tr
    -
    -  1.0.7 - 1 May 2008. Support for wmode attribute for embed; $config["show_setting"] introduced; improved $config["elements"] evaluation
    -
    -  1.0.6 - 20 April 2008. $config["and_mark"] introduced
    -
    -  1.0.5 - 12 March 2008. style URL schemes essentially disallowed when $config safe is on; improved regex for CSS expression search
    -
    -  1.0.4 - 10 March 2008. Improved corrections for blockquote, form, map and noscript
    -
    -  1.0.3 - 3 March 2008. Character entities for soft-hyphens are now replaced with spaces (instead of being removed); a bug allowing td directly inside table fixed; safe $config parameter added
    -
    -  1.0.2 - 13 February 2008. Improved implementation of $config["keep_bad"]
    -
    -  1.0.1 - 7 November 2007. Improved regex for identifying URLs, protocols and dynamic expressions (hl_tag() and hl_prot()); no error display with hl_regex()
    -
    -  1.0 - 2 November 2007. First release
    - -
    -

    -4.4  Testing -

    (to top)
    -
    -  To test htmLawed using a form interface, a demo web-page is provided with the htmLawed distribution (htmLawed.php and htmLawedTest.php should be in the same directory on the web-server). A file with test-cases is also provided.
    - -
    -

    -4.5  Upgrade, & old versions -

    (to top)
    -
    -  Upgrading is as simple as replacing the previous version of htmLawed.php (assuming it was not modified for customized features). As htmLawed output is almost always used in static documents, upgrading should not affect old, finalized content.
    -
    Important  The following upgrades may affect the functionality of a specific htmLawed as indicated by their corresponding notes:
    -
    -  (1) From version 1.1-1.1.10 to 1.1.11, if a hook_tag function is in use: In version 1.1.11, elements in closing tags (and not just the opening tags) are also passed to the function. There are no attribute names/values to pass, so a hook_tag function receives only the element name. The hook_tag function therefore may have to be edited. See section 3.4.9.
    -
    -  Old versions of htmLawed may be available online. E.g., for version 1.0, check http://www.bioinformatics.org/phplabware/downloads/htmLawed1.zip, for 1.1.1, htmLawed111.zip, and for 1.1.10, htmLawed1110.zip.
    - -
    -

    -4.6  Comparison with HTMLPurifier -

    (to top)
    -
    -  The HTMLPurifier PHP library by Edward Yang is a very good HTML filtering script that uses object oriented PHP code. Compared to htmLawed, it (as of mid-2009):
    -
    -  *  does not support PHP versions older than 5.0 (HTMLPurifier dropped PHP 4 support after version 2)
    -
    -  *  is 15-20 times bigger (scores of files totalling more than 750 kb)
    -
    -  *  consumes 10-15 times more RAM memory (just including the HTMLPurifier files without calling the filter requires a few MBs of memory)
    -
    -  *  is expectedly slower
    -
    -  *  does not allow admins to fully allow all valid HTML (because of incomplete HTML support, it always considers elements like script illegal)
    -
    -  *  lacks many of the extra features of htmLawed (like entity conversions and code compaction/beautification)
    -
    -  *  has poor documentation
    -
    -  However, HTMLPurifier has finer checks for character encodings and attribute values, and can log warnings and errors. Visit the HTMLPurifier website for updated information.
    - -
    -

    -4.7  Use through application plug-ins/modules -

    (to top)
    -
    -  Plug-ins/modules to implement htmLawed in applications such as Drupal and DokuWiki may have been developed. Please check the application websites and the forum on the htmLawed site.
    - -
    -

    -4.8  Use in non-PHP applications -

    (to top)
    -
    -  Non-PHP applications written in Python, Ruby, etc., may be able to use htmLawed through system calls to the PHP engine. Such code may have been documented on the internet. Also check the forum on the htmLawed site.
    - -
    -

    -4.9  Donate -

    (to top)
    -
    -  A donation in any currency and amount to appreciate or support this software can be sent by PayPal to this email address: drpatnaik at yahoo dot com.
    - -
    -

    -4.10  Acknowledgements -

    (to top)
    -
    -  Nicholas Alipaz, Bryan Blakey, Pádraic Brady, Ulf Harnhammer, Gareth Heyes, Klaus Leithoff, Lukasz Pilorz, Shelley Powers, Edward Yang, and many anonymous users.
    -
    -  Thank you!
    - -
    -
    -

    -5  Appendices -

    (to top)
    -

    -5.1  Characters discouraged in XHTML -

    (to top)
    -
    -  Characters represented by the following hexadecimal code-points are not invalid, even though some validators may issue messages stating otherwise.
    -
    7f to 84, 86 to 9f, fdd0 to fddf, 1fffe, 1ffff, 2fffe, 2ffff, 3fffe, 3ffff, 4fffe, 4ffff, 5fffe, 5ffff, 6fffe, 6ffff, 7fffe, 7ffff, 8fffe, 8ffff, 9fffe, 9ffff, afffe, affff, bfffe, bffff, cfffe, cffff, dfffe, dffff, efffe, effff, ffffe, fffff, 10fffe and 10ffff
    - -
    -

    -5.2  Valid attribute-element combinations -

    (to top)
    -
    -  Valid attribute-element combinations as per W3C specs.
    -
    -  *  includes deprecated attributes (marked ^), attributes for the non-standard embed element (marked *), and the proprietary bordercolor (marked ~)
    -  *  only non-frameset, HTML body elements
    -  *  name for a and map, and lang are invalid in XHTML 1.1
    -  *  target is valid for a in XHTML 1.1 and higher
    -  *  xml:space is only for XHTML 1.1
    -
    -  abbr - td, th
    -  accept - form, input
    -  accept-charset - form
    -  accesskey - a, area, button, input, label, legend, textarea
    -  action - form
    -  align - caption^, embed, applet, iframe, img^, input^, object^, legend^, table^, hr^, div^, h1^, h2^, h3^, h4^, h5^, h6^, p^, col, colgroup, tbody, td, tfoot, th, thead, tr
    -  alt - applet, area, img, input
    -  archive - applet, object
    -  axis - td, th
    -  bgcolor - embed, table^, tr^, td^, th^
    -  border - table, img^, object^
    -  bordercolor~ - table, td, tr
    -  cellpadding - table
    -  cellspacing - table
    -  char - col, colgroup, tbody, td, tfoot, th, thead, tr
    -  charoff - col, colgroup, tbody, td, tfoot, th, thead, tr
    -  charset - a, script
    -  checked - input
    -  cite - blockquote, q, del, ins
    -  classid - object
    -  clear - br^
    -  code - applet
    -  codebase - object, applet
    -  codetype - object
    -  color - font
    -  cols - textarea
    -  colspan - td, th
    -  compact - dir, dl^, menu, ol^, ul^
    -  coords - area, a
    -  data - object
    -  datetime - del, ins
    -  declare - object
    -  defer - script
    -  dir - bdo
    -  disabled - button, input, optgroup, option, select, textarea
    -  enctype - form
    -  face - font
    -  flashvars* - embed
    -  for - label
    -  frame - table
    -  frameborder - iframe
    -  headers - td, th
    -  height - embed, iframe, td^, th^, img, object, applet
    -  href - a, area
    -  hreflang - a
    -  hspace - applet, img^, object^
    -  ismap - img, input
    -  label - option, optgroup
    -  language - script^
    -  longdesc - img, iframe
    -  marginheight - iframe
    -  marginwidth - iframe
    -  maxlength - input
    -  method - form
    -  model* - embed
    -  multiple - select
    -  name - button, embed, textarea, applet^, select, form^, iframe^, img^, a^, input, object, map^, param
    -  nohref - area
    -  noshade - hr^
    -  nowrap - td^, th^
    -  object - applet
    -  onblur - a, area, button, input, label, select, textarea
    -  onchange - input, select, textarea
    -  onfocus - a, area, button, input, label, select, textarea
    -  onreset - form
    -  onselect - input, textarea
    -  onsubmit - form
    -  pluginspage* - embed
    -  pluginurl* - embed
    -  prompt - isindex
    -  readonly - textarea, input
    -  rel - a
    -  rev - a
    -  rows - textarea
    -  rowspan - td, th
    -  rules - table
    -  scope - td, th
    -  scrolling - iframe
    -  selected - option
    -  shape - area, a
    -  size - hr^, font, input, select
    -  span - col, colgroup
    -  src - embed, script, input, iframe, img
    -  standby - object
    -  start - ol^
    -  summary - table
    -  tabindex - a, area, button, input, object, select, textarea
    -  target - a^, area, form
    -  type - a, embed, object, param, script, input, li^, ol^, ul^, button
    -  usemap - img, input, object
    -  valign - col, colgroup, tbody, td, tfoot, th, thead, tr
    -  value - input, option, param, button, li^
    -  valuetype - param
    -  vspace - applet, img^, object^
    -  width - embed, hr^, iframe, img, object, table, td^, th^, applet, col, colgroup, pre^
    -  wmode - embed
    -  xml:space - pre, script, style
    -
    -  These are allowed in all but the shown elements:
    -
    -  class - param, script
    -  dir - applet, bdo, br, iframe, param, script
    -  id - script
    -  lang - applet, br, iframe, param, script
    -  onclick - applet, bdo, br, font, iframe, isindex, param, script
    -  ondblclick - applet, bdo, br, font, iframe, isindex, param, script
    -  onkeydown - applet, bdo, br, font, iframe, isindex, param, script
    -  onkeypress - applet, bdo, br, font, iframe, isindex, param, script
    -  onkeyup - applet, bdo, br, font, iframe, isindex, param, script
    -  onmousedown - applet, bdo, br, font, iframe, isindex, param, script
    -  onmousemove - applet, bdo, br, font, iframe, isindex, param, script
    -  onmouseout - applet, bdo, br, font, iframe, isindex, param, script
    -  onmouseover - applet, bdo, br, font, iframe, isindex, param, script
    -  onmouseup - applet, bdo, br, font, iframe, isindex, param, script
    -  style - param, script
    -  title - param, script
    -  xml:lang - applet, br, iframe, param, script
    - -
    -

    -5.3  CSS 2.1 properties accepting URLs -

    (to top)
    -
    -  background
    -  background-image
    -  content
    -  cue-after
    -  cue-before
    -  cursor
    -  list-style
    -  list-style-image
    -  play-during
    - -
    -

    -5.4  Microsoft Windows 1252 character replacements -

    (to top)
    -
    -  Key: d double, l left, q quote, r right, s. single
    -
    -  Code-point (decimal) - hexadecimal value - replacement entity - represented character
    -
    -  127 - 7f - (removed) - (not used)
    -  128 - 80 - &#8364; - euro
    -  129 - 81 - (removed) - (not used)
    -  130 - 82 - &#8218; - baseline s. q
    -  131 - 83 - &#402; - florin
    -  132 - 84 - &#8222; - baseline d q
    -  133 - 85 - &#8230; - ellipsis
    -  134 - 86 - &#8224; - dagger
    -  135 - 87 - &#8225; - d dagger
    -  136 - 88 - &#710; - circumflex accent
    -  137 - 89 - &#8240; - permile
    -  138 - 8a - &#352; - S Hacek
    -  139 - 8b - &#8249; - l s. guillemet
    -  140 - 8c - &#338; - OE ligature
    -  141 - 8d - (removed) - (not used)
    -  142 - 8e - &#381; - Z dieresis
    -  143 - 8f - (removed) - (not used)
    -  144 - 90 - (removed) - (not used)
    -  145 - 91 - &#8216; - l s. q
    -  146 - 92 - &#8217; - r s. q
    -  147 - 93 - &#8220; - l d q
    -  148 - 94 - &#8221; - r d q
    -  149 - 95 - &#8226; - bullet
    -  150 - 96 - &#8211; - en dash
    -  151 - 97 - &#8212; - em dash
    -  152 - 98 - &#732; - tilde accent
    -  153 - 99 - &#8482; - trademark
    -  154 - 9a - &#353; - s Hacek
    -  155 - 9b - &#8250; - r s. guillemet
    -  156 - 9c - &#339; - oe ligature
    -  157 - 9d - (removed) - (not used)
    -  158 - 9e - &#382; - z dieresis
    -  159 - 9f - &#376; - Y dieresis
    - -
    -

    -5.5  URL format -

    (to top)
    -
    -  An absolute URL has a protocol or scheme, a network location or hostname, and, optional path, parameters, query and fragment segments. Thus, an absolute URL has this generic structure:
    -
    - -    (scheme) : (//network location) /(path) ;(parameters) ?(query) #(fragment) -
    -
    -  The schemes can only contain letters, digits, +, . and -. Hostname is the portion after the // and up to the first / (if any; else, up to the end) when : is followed by a // (e.g., abc.com in ftp://abc.com/def); otherwise, it consists of everything after the : (e.g., def@abc.com in mailto:def@abc.com').
    -
    Relative URLs do not have explicit schemes and network locations; such values are inherited from a base URL.
    - -
    -

    -5.6  Brief on htmLawed code -

    (to top)
    -
    -  Much of the code's logic and reasoning can be understood from the documentation above.
    -
    -  The output of htmLawed is a text string containing the processed input. There is no custom error tracking.
    -
    Function arguments for htmLawed are:
    -
    -  *  $in - 1st argument; a text string; the input text to be processed. Any extraneous slashes added by PHP when magic quotes are enabled should be removed beforehand using PHP's stripslashes() function.
    -
    -  *  $config - 2nd argument; an associative array; optional (named $C in htmLawed code). The array has keys with names like balance and keep_bad, and the values, which can be boolean, string, or array, depending on the key, are read to accordingly set the configurable parameters (indicated by the keys). All configurable parameters receive some default value if the value to be used is not specified by the user through $config. Finalized $config is thus a filtered and possibly larger array.
    -
    -  *  $spec - 3rd argument; a text string; optional. The string has rules, written in an htmLawed-designated format, specifying element-specific attribute and attribute value restrictions. Function hl_spec() is used to convert the string to an associative-array for internal use. Finalized $spec is thus an array.
    -
    Finalized $config and $spec are made global variables while htmLawed is at work. Values of any pre-existing global variables with same names are noted, and their values are restored after htmLawed finishes processing the input (to capture the finalized values, the show_settings parameter of $config should be used). Depending on $config, another global variable hl_Ids, to track id attribute values for uniqueness, may be set. Unlike the other two variables, this one is not reset (or unset) post-processing.
    -
    -  Except for the main function htmLawed() and the functions kses() and kses_hook(), htmLawed's functions are name-spaced using the hl_ prefix. The functions and their roles are:
    -
    -  *  hl_attrval - checking attribute values against $spec
    -  *  hl_bal - tag balancing
    -  *  hl_cmtcd - handling CDATA sections and HTML comments
    -  *  hl_ent - entity handling
    -  *  hl_prot - checking a URL scheme/protocol
    -  *  hl_regex - checking syntax of a regular expression
    -  *  hl_spec - converting user-supplied $spec value to one used by htmLawed internally
    -  *  hl_tag - handling tags
    -  *  hl_tag2 - transforming tags
    -  *  hl_tidy - compact/beautify HTML
    -  *  hl_version - reporting htmLawed version
    -  *  htmLawed - main function
    -  *  kses - main function of kses
    -  *  kses_hook - hook function of kses
    -
    -  The last two are for compatibility with pre-existing code using the kses script. htmLawed's kses() basically passes on the filtering task to htmLawed() function after deciphering $config and $spec from the argument values supplied to it. kses_hook() is an empty function and is meant for being filled with custom code if the kses script users were using one.
    -
    htmLawed() finalizes $spec (with the help of hl_spec()) and $config, and globalizes them. Finalization of $config involves setting default values if an inappropriate or invalid one is supplied. This includes calling hl_regex() to check well-formedness of regular expression patterns if such expressions are user-supplied through $config. htmLawed() then removes invalid characters like nulls and x01 and appropriately handles entities using hl_ent(). HTML comments and CDATA sections are identified and treated as per $config with the help of hl_cmtcd(). When retained, the < and > characters identifying them, and the <, > and & characters inside them, are replaced with control characters (code-points 1 to 5) till any tag balancing is completed.
    -
    -  After this initial processing htmLawed() identifies tags using regex and processes them with the help of hl_tag() --  a large function that analyzes tag content, filtering it as per HTML standards, $config and $spec. Among other things, hl_tag() transforms deprecated elements using hl_tag2(), removes attributes from closing tags, checks attribute values as per $spec rules using hl_attrval(), and checks URL protocols using hl_prot(). htmLawed() performs tag balancing and nesting checks with a call to hl_bal(), and optionally compacts/beautifies the output with proper white-spacing with a call to hl_tidy(). The latter temporarily replaces white-space, and <, > and & characters inside pre, script and textarea elements, and HTML comments and CDATA sections with control characters (code-points 1 to 5, and 7).
    -
    -  htmLawed permits the use of custom code or hook functions at two stages. The first, called inside htmLawed(), allows the input text as well as the finalized $config and $spec values to be altered right after the initial processing (see section 3.7). The second is called by hl_tag() once the tag content is finalized (see section 3.4.9).
    -
    -  Being dictated by the external and stable HTML standard, htmLawed's objective is very clear-cut and less concerned with tweakability. The code is only minimally annotated with comments -- it is not meant to instruct; PHP developers familiar with the HTML specs will see the logic, and others can always refer to the htmLawed documentation. The compact structuring of the statements is meant to aid in quickly grasping the logic, at least when viewed with code syntax highlighted. -
    -
    -
    -


    HTM version of htmLawed_README.txt generated on 06 Jun, 2012 using rTxt2htm from PHP Labware -
    -
    - + + + + + + + + +htmLawed documentation | htmLawed PHP software is a free, open-source, customizable HTML input purifier and filter + + +
    +

    htmLawed documentation

    + +
    1  About htmLawed
    1.1  Example uses
    1.2  Features
    1.3  History
    1.4  License & copyright
    1.5  Terms used here
    +2  Usage
    2.1  Simple
    2.2  Configuring htmLawed using the $config parameter
    2.3  Extra HTML specifications using the $spec parameter
    2.4  Performance time & memory usage
    2.5  Some security risks to keep in mind
    2.6  Use without modifying old kses() code
    2.7  Tolerance for ill-written HTML
    2.8  Limitations & work-arounds
    2.9  Examples of usage
    +3  Details
    3.1  Invalid/dangerous characters
    3.2  Character references/entities
    3.3  HTML elements
    +    3.3.1  HTML comments and CDATA sections
    +    3.3.2  Tag-transformation for better XHTML-Strict
    +    3.3.3  Tag balancing and proper nesting
    +    3.3.4  Elements requiring child elements
    +    3.3.5  Beautify or compact HTML
    3.4  Attributes
    +    3.4.1  Auto-addition of XHTML-required attributes
    +    3.4.2  Duplicate/invalid id values
    +    3.4.3  URL schemes (protocols) and scripts in attribute values
    +    3.4.4  Absolute & relative URLs
    +    3.4.5  Lower-cased, standard attribute values
    +    3.4.6  Transformation of deprecated attributes
    +    3.4.7  Anti-spam & href
    +    3.4.8  Inline style properties
    +    3.4.9  Hook function for tag content
    3.5  Simple configuration directive for most valid XHTML
    3.6  Simple configuration directive for most safe HTML
    3.7  Using a hook function
    3.8  Obtaining finalized parameter values
    3.9  Retaining non-HTML tags in input with mixed markup
    +4  Other
    4.1  Support
    4.2  Known issues
    4.3  Change-log
    4.4  Testing
    4.5  Upgrade, & old versions
    4.6  Comparison with HTMLPurifier
    4.7  Use through application plug-ins/modules
    4.8  Use in non-PHP applications
    4.9  Donate
    4.10  Acknowledgements
    +5  Appendices
    5.1  Characters discouraged in HTML
    5.2  Valid attribute-element combinations
    5.3  CSS 2.1 properties accepting URLs
    5.4  Microsoft Windows 1252 character replacements
    5.5  URL format
    5.6  Brief on htmLawed code
    + +
    +
    +
    htmLawed_README.txt, 29 August 2013
    +htmLawed 1.1.16, 29 August 2013
    +Copyright Santosh Patnaik
    +Dual licensed with LGPL 3 and GPL 2+
    +A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed 
    +
    + +

    +1  About htmLawed +

    (to top)
    +
    +  htmLawed is a PHP script to process text with HTML markup to make it more compliant with HTML standards and administrative policies. It works by making HTML well-formed with balanced and properly nested tags, neutralizing code that may be used for cross-site scripting (XSS) attacks, allowing only specified HTML tags and attributes, and so on. Such lawing in of HTML in text used in (X)HTML or XML documents ensures that it is in accordance with the aesthetics, safety and usability requirements set by administrators.
    +
    +  htmLawed is highly customizable, and fast with low memory usage. Its free and open-source code is in one small file, does not require extensions or libraries, and works in older versions of PHP as well. It is a good alternative to the HTML Tidy application.
    + +

    +1.1  Example uses +

    (to top)
    +
    +  *  Filtering of text submitted as comments on blogs to allow only certain HTML elements
    +
    +  *  Making RSS/Atom newsfeed item-content standard-compliant: often one uses an excerpt from an HTML document for the content, and with unbalanced tags, non-numerical entities, etc., such excerpts may not be XML-compliant
    +
    +  *  Text processing for stricter XML standard-compliance: e.g., to have lowercased x in hexadecimal numeric entities becomes necessary if an XHTML document with MathML content needs to be served as application/xml
    +
    +  *  Scraping text or data from web-pages
    +
    +  *  Pretty-printing HTML code
    + +
    +

    +1.2  Features +

    (to top)
    +
    +  Key: * security feature, ^ standard compliance, ~ requires setting right options, ` different from Kses
    +
    +  *  make input more secure and standard-compliant
    +  *  use for HTML 4, XHTML 1.0 or 1.1, or even generic XML documents  ^~`
    +
    +  *  beautify or compact HTML  ^~`
    +
    +  *  can restrict elements  ^~`
    +  *  ensures proper closure of empty elements like img  ^`
    +  *  transform deprecated elements like u  ^~`
    +  *  HTML comments and CDATA sections can be permitted  ^~`
    +  *  elements like script, object and form can be permitted  ~
    +
    +  *  restrict attributes, including element-specifically  ^~`
    +  *  remove invalid attributes  ^`
    +  *  element and attribute names are lower-cased  ^
    +  *  provide required attributes, like alt for image  ^`
    +  *  transforms deprecated attributes  ^~`
    +  *  attributes declared only once  ^`
    +
    +  *  restrict attribute values, including element-specifically  ^~`
    +  *  a value is declared for empty (minimized) attributes like checked  ^
    +  *  check for potentially dangerous attribute values  *~
    +  *  ensure unique id attribute values  ^~`
    +  *  double-quote attribute values  ^
    +  *  lower-case standard attribute values like password  ^`
    +  *  permit custom, non-standard attributes as well as custom rules for standard attributes  ~`
    +
    +  *  attribute-specific URL protocol/scheme restriction  *~`
    +  *  disable dynamic expressions in style values  *~`
    +
    +  *  neutralize invalid named character entities  ^`
    +  *  convert hexadecimal numeric entities to decimal ones, or vice versa  ^~`
    +  *  convert named entities to numeric ones for generic XML use  ^~`
    +
    +  *  remove null characters  *
    +  *  neutralize potentially dangerous proprietary Netscape Javascript entities  *
    +  *  replace potentially dangerous soft-hyphen character in URL-accepting attribute values with spaces  *
    +
    +  *  remove common invalid characters not allowed in HTML or XML  ^`
    +  *  replace characters from Microsoft applications like Word that are discouraged in HTML or XML  ^~`
    +  *  neutralize entities for characters invalid or discouraged in HTML or XML  ^`
    +  *  appropriately neutralize <, &, ", and > characters  ^*`
    +
    +  *  understands improperly spaced tag content (like, spread over more than a line) and properly spaces them  `
    +  *  attempts to balance tags for well-formedness  ^~`
    +  *  understands when omitable closing tags like </p> (allowed in HTML 4, transitional, e.g.) are missing  ^~`
    +  *  attempts to permit only validly nested tags  ^~`
    +  *  option to remove or neutralize bad content ^~`
    +  *  attempts to rectify common errors of plain-text misplacement (e.g., directly inside blockquote) ^~`
    +
    +  *  fast, non-OOP code of ~45 kb incurring peak basal memory usage of ~0.5 MB
    +  *  compatible with pre-existing code using Kses (the filter used by WordPress)
    +
    +  *  optional anti-spam measures such as addition of rel="nofollow" and link-disabling  ~`
    +  *  optionally makes relative URLs absolute, and vice versa  ~`
    +
    +  *  optionally mark & to identify the entities for &, < and > introduced by htmLawed  ~`
    +
    +  *  allows deployment of powerful hook functions to inject HTML, consolidate style attributes to class, finely check attribute values, etc.  ~`
    +
    +  *  independent of character encoding of input and does not affect it
    +
    +  *  tolerance for ill-written HTML to a certain degree
    + +
    +

    +1.3  History +

    (to top)
    +
    +  htmLawed was created in 2007 for use with LabWiki, a wiki software developed at PHP Labware, as a suitable software could not be found. Existing PHP software like Kses and HTMLPurifier were deemed inadequate, slow, resource-intensive, or dependent on an extension or external application like HTML Tidy. The core logic of htmLawed, that of identifying HTML elements and attributes, was based on the Kses (version 0.2.2) HTML filter software of Ulf Harnhammar (it can still be used with code that uses Kses; see section 2.6.).
    +
    +  See section 4.3 for a detailed log of changes in htmLawed over the years, and section 4.10 for acknowledgements.
    + +
    +

    +1.4  License & copyright +

    (to top)
    +
    +  htmLawed is free and open-source software dual copyrighted by Santosh Patnaik, MD, PhD, and licensed under LGPL license version 3, and GPL license version 2 (or later).
    + +
    +

    +1.5  Terms used here +

    (to top)
    +
    +  In this document, only HTML body-level elements are considered. htmLawed does not have support for head-level elements, body, and the frame-level elements, frameset, frame and noframes, and these elements are ignored here.
    +
    +  *  administrator - or admin; person setting up the code that utilizes htmLawed; also, user
    +  *  attributes - name-value pairs like href="http://x.com" in opening tags
    +  *  author - see writer
    +  *  character - atomic unit of text; internally represented by a numeric code-point as specified by the encoding or charset in use
    +  *  entity - markup like &gt; and &#160; used to refer to a character
    +  *  element - HTML element like a and img
    +  *  element content -  content between the opening and closing tags of an element, like click of the <a href="x">click</a> element
    +  *  HTML - implies XHTML unless specified otherwise
    +  *  HTML body - Complete HTML documents typically have a head and a body container. Information in head specifies title of the document, etc., whereas that in the body informs what is to be displayed on a web-page; it is only the elements for body, except frames, frameset and noframes that htmLawed is concerned with
    +  *  input - text given to htmLawed to process
    +  *  processing - involves filtering, correction, etc., of input
    +  *  safe - absence or reduction of certain characters and HTML elements and attributes in HTML of text that can otherwise potentially, and circumstantially, expose text readers to security vulnerabilities like cross-site scripting attacks (XSS)
    +  *  scheme - a URL protocol like http and ftp
    +  *  specifications - standard specifications, for HTML4, HTML5, Ruby, etc.
    +  *  style property - terms like border and height for which declarations are made in values for the style attribute of elements
    +  *  tag - markers like <a href="x"> and </a> delineating element content; the opening tag can contain attributes
    +  *  tag content - consists of tag markers < and >, element names like div, and possibly attributes
    +  *  user - administrator
    +  *  writer - end-user like a blog commenter providing the input that is to be processed; also, author
    + +
    +

    +1.6  Availability +

    (to top)
    +
    +  htmLawed can be downloaded for free at its website. Besides the htmLawed.php file, the download has the htmLawed documentation (this document) in plain text and HTML formats, a script for testing, and a text file for test-cases. htmLawed is also available as a PHP class (OOP code) on its website.
    + +
    +
    +

    +2  Usage +

    (to top)
    +
    +  htmLawed works in PHP version 4.4 or higher. Either include() the htmLawed.php file, or copy-paste the entire code. To use with PHP 4.3, have the following code included:
    +
    + +    if(!function_exists('ctype_digit')){ +
    + +     function ctype_digit($var){ +
    + +      return ((int) $var == $var); +
    + +     } +
    + +    } +
    + +

    +2.1  Simple +

    (to top)
    +
    +  The input text to be processed, $text, is passed as an argument of type string; htmLawed() returns the processed string:
    +
    + +    $processed = htmLawed($text); +
    +
    +  With the htmLawed class (section 1.6), usage is:
    +
    + +    $processed = htmLawed::hl($text); +
    +
    Notes: (1) If input is from a $_GET or $_POST value, and magic quotes are enabled on the PHP setup, run stripslashes() on the input before passing to htmLawed. (2) htmLawed does not have support for head-level elements, body, and the frame-level elements, frameset, frame and noframes.
    +
    +  By default, htmLawed will process the text allowing all valid HTML elements/tags, secure URL scheme/CSS style properties, etc. It will allow CDATA sections and HTML comments, balance tags, and ensure proper nesting of elements. Such actions can be configured using two other optional arguments -- $config and $spec:
    +
    + +    $processed = htmLawed($text, $config, $spec); +
    +
    +  The $config and $spec arguments are detailed below. Some examples are shown in section 2.9. For maximum protection against XSS and other scripting attacks (e.g., by disallowing Javascript code), consider using the safe parameter; see section 3.6.
    + +
    +

    +2.2  Configuring htmLawed using the $config parameter +

    (to top)
    +
    $config instructs htmLawed on how to tackle certain tasks. When $config is not specified, or not set as an array (e.g., $config = 1), htmLawed will take default actions. One or many of the task-action or value-specification pairs can be specified in $config as array key-value pairs. If a parameter is not specified, htmLawed will use the default value/action indicated further below.
    +
    + +    $config = array('comment'=>0, 'cdata'=>1); +
    + +    $processed = htmLawed($text, $config); +
    +
    +  Or,
    +
    + +    $processed = htmLawed($text, array('comment'=>0, 'cdata'=>1)); +
    +
    +  Below are the possible value-specification combinations. In PHP code, values that are integers should not be quoted and should be used as numeric types (unless meant as string/text).
    +
    +  Key: * default, ^ different default when htmLawed is used in the Kses-compatible mode (see section 2.6), ~ different default when valid_xhtml is set to 1 (see section 3.5), " different default when safe is set to 1 (see section 3.6)
    +
    abs_url
    +  Make URLs absolute or relative; $config["base_url"] needs to be set; see section 3.4.4
    +
    -1 - make relative
    0 - no action  *
    1 - make absolute
    +
    and_mark
    +  Mark & characters in the original input; see section 3.2
    +
    anti_link_spam
    +  Anti-link-spam measure; see section 3.4.7
    +
    0 - no measure taken  *
    array("regex1", "regex2") - will ensure a rel attribute with nofollow in its value in case the href attribute value matches the regular expression pattern regex1, and/or will remove href if its value matches the regular expression pattern regex2. E.g., array("/./", "/://\W*(?!(abc\.com|xyz\.org))/"); see section 3.4.7 for more.
    +
    anti_mail_spam
    +  Anti-mail-spam measure; see section 3.4.7
    +
    0 - no measure taken  *
    word - @ in mail address in href attribute value is replaced with specified word
    +
    balance
    +  Balance tags for well-formedness and proper nesting; see section 3.3.3
    +
    0 - no
    1 - yes  *
    +
    base_url
    +  Base URL value that needs to be set if $config["abs_url"] is not 0; see section 3.4.4
    +
    cdata
    +  Handling of CDATA sections; see section 3.3.1
    +
    0 - don't consider CDATA sections as markup and proceed as if plain text  ^"
    1 - remove
    2 - allow, but neutralize any <, >, and & inside by converting them to named entities
    3 - allow  *
    +
    clean_ms_char
    +  Replace discouraged characters introduced by Microsoft Word, etc.; see section 3.1
    +
    0 - no  *
    1 - yes
    2 - yes, but replace special single & double quotes with ordinary ones
    +
    comment
    +  Handling of HTML comments; see section 3.3.1
    +
    0 - don't consider comments as markup and proceed as if plain text  ^"
    1 - remove
    2 - allow, but neutralize any <, >, and & inside by converting to named entities
    3 - allow  *
    +
    css_expression
    +  Allow dynamic CSS expression by not removing the expression from CSS property values in style attributes; see section 3.4.8
    +
    0 - remove  *
    1 - allow
    +
    deny_attribute
    +  Denied HTML attributes; see section 3.4
    +
    0 - none  *
    string - dictated by values in string
    on* (like onfocus) attributes not allowed - "
    +
    direct_nest_list
    +  Allow direct nesting of a list within another without requiring it to be a list item; see section 3.3.4
    +
    0 - no  *
    1 - yes
    +
    elements
    +  Allowed HTML elements; see section 3.3
    +
    * -center -dir -font -isindex -menu -s -strike -u -  ~
    applet, embed, iframe, object, script not allowed - "
    +
    hexdec_entity
    +  Allow hexadecimal numeric entities and do not convert to the more widely accepted decimal ones, or convert decimal to hexadecimal ones; see section 3.2
    +
    0 - no
    1 - yes  *
    2 - convert decimal to hexadecimal ones
    +
    hook
    +  Name of an optional hook function to alter the input string, $config or $spec before htmLawed starts its main work; see section 3.7
    +
    0 - no hook function  *
    name - name is name of the hook function (kses_hook  ^)
    +
    hook_tag
    +  Name of an optional hook function to alter tag content finalized by htmLawed; see section 3.4.9
    +
    0 - no hook function  *
    name - name is name of the hook function
    +
    keep_bad
    +  Neutralize bad tags by converting < and > to entities, or remove them; see section 3.3.3
    +
    0 - remove  ^
    1 - neutralize both tags and element content
    2 - remove tags but neutralize element content
    3 and 4 - like 1 and 2 but remove if text (pcdata) is invalid in parent element
    5 and 6 * -  like 3 and 4 but line-breaks, tabs and spaces are left
    +
    lc_std_val
    +  For XHTML compliance, predefined, standard attribute values, like get for the method attribute of form, must be lowercased; see section 3.4.5
    +
    0 - no
    1 - yes  *
    +
    make_tag_strict
    +  Transform/remove these non-strict XHTML elements, even if they are allowed by the admin: applet center dir embed font isindex menu s strike u; see section 3.3.2
    +
    0 - no  ^
    1 - yes, but leave applet, embed and isindex elements that currently can't be transformed  *
    2 - yes, removing applet, embed and isindex elements and their contents (nested elements remain)  ~
    +
    named_entity
    +  Allow non-universal named HTML entities, or convert to numeric ones; see section 3.2
    +
    0 - convert
    1 - allow  *
    +
    no_deprecated_attr
    +  Allow deprecated attributes or transform them; see section 3.4.6
    +
    0 - allow  ^
    1 - transform, but name attributes for a and map are retained  *
    2 - transform
    +
    parent
    +  Name of the parent element, possibly imagined, that will hold the input; see section 3.3
    +
    safe
    +  Magic parameter to make input the most secure against XSS without needing to specify other relevant $config parameters; see section 3.6
    +
    0 - no  *
    1 - will auto-adjust other relevant $config parameters (indicated by " in this list)
    +
    schemes
    +  Array of attribute-specific, comma-separated, lower-cased list of schemes (protocols) allowed in attributes accepting URLs (or ! to deny any URL); * covers all unspecified attributes; see section 3.4.3
    +
    href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https  *
    *: ftp, gopher, http, https, mailto, news, nntp, telnet  ^
    href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; style: !; *:file, http, https  "
    +
    show_setting
    +  Name of a PHP variable to assign the finalized $config and $spec values; see section 3.8
    +
    style_pass
    +  Do not look at style attribute values, letting them through without any alteration
    +
    0 - no *
    1 - htmLawed will let through any style value; see section 3.4.8
    +
    tidy
    +  Beautify or compact HTML code; see section 3.3.5
    +
    -1 - compact
    0 - no  *
    1 or string - beautify (custom format specified by string)
    +
    unique_ids
    id attribute value checks; see section 3.4.2
    +
    0 - no  ^
    1 - remove duplicate and/or invalid ones  *
    word - remove invalid ones and replace duplicate ones with new and unique ones based on the word; the admin-specified word, like my_, should begin with a letter (a-z) and can contain letters, digits, ., _, -, and :.
    +
    valid_xhtml
    +  Magic parameter to make input the most valid XHTML without needing to specify other relevant $config parameters; see section 3.5
    +
    0 - no  *
    1 - will auto-adjust other relevant $config parameters (indicated by ~ in this list)
    +
    xml:lang
    +  Auto-adding xml:lang attribute; see section 3.4.1
    +
    0 - no  *
    1 - add if lang attribute is present
    2 - add if lang attribute is present, and remove lang  ~
    + +
    +

    +2.3  Extra HTML specifications using the $spec parameter +

    (to top)
    +
    +  The $spec argument of htmLawed can be used to disallow an otherwise legal attribute for an element, or to restrict the attribute's values. This can also be helpful as a security measure (e.g., in certain versions of browsers, certain values can cause buffer overflows and denial of service attacks), or in enforcing admin policies. $spec is specified as a string of text containing one or more rules, with multiple rules separated from each other by a semi-colon (;). E.g.,
    +
    + +    $spec = 'i=-*; td, tr=style, id, -*; a=id(match="/[a-z][a-z\d.:\-`"]*/i"/minval=2), href(maxlen=100/minlen=34); img=-width,-alt'; +
    + +    $processed = htmLawed($text, $config, $spec); +
    +
    +  Or,
    +
    + +    $processed = htmLawed($text, $config, 'i=-*; td, tr=style, id, -*; a=id(match="/[a-z][a-z\d.:\-`"]*/i"/minval=2), href(maxlen=100/minlen=34); img=-width,-alt'); +
    +
    +  A rule begins with an HTML element name(s) (rule-element), for which the rule applies, followed by an equal (=) sign. A rule-element may represent multiple elements if comma (,)-separated element names are used. E.g., th,td,tr=.
    +
    +  Rest of the rule consists of comma-separated HTML attribute names. A minus (-) character before an attribute means that the attribute is not permitted inside the rule-element. E.g., -width. To deny all attributes, -* can be used.
    +
    +  Following shows examples of rule excerpts with rule-element a and the attributes that are being permitted:
    +
    +  *  a= - all
    +  *  a=id - all
    +  *  a=href, title, -id, -onclick - all except id and onclick
    +  *  a=*, id, -id - all except id
    +  *  a=-* - none
    +  *  a=-*, href, title - none except href and title
    +  *  a=-*, -id, href, title - none except href and title
    +
    +  Rules regarding attribute values are optionally specified inside round brackets after attribute names in slash ('/')-separated parameter = value pairs. E.g., title(maxlen=30/minlen=5). None or one or more of the following parameters may be specified:
    +
    +  *  oneof - one or more choices separated by | that the value should match; if only one choice is provided, then the value must match that choice
    +
    +  *  noneof - one or more choices separated by | that the value should not match
    +
    +  *  maxlen and minlen - upper and lower limits for the number of characters in the attribute value; specified in numbers
    +
    +  *  maxval and minval - upper and lower limits for the numerical value specified in the attribute value; specified in numbers
    +
    +  *  match and nomatch - pattern that the attribute value should or should not match; specified as PHP/PCRE-compatible regular expressions with delimiters and possibly modifiers
    +
    +  *  default - a value to force on the attribute if the value provided by the writer does not fit any of the specified parameters
    +
    +  If default is not set and the attribute value does not satisfy any of the specified parameters, then the attribute is removed. The default value can also be used to force all attribute declarations to take the same value (by getting the values declared illegal by setting, e.g., maxlen to -1).
    +
    +  Examples with input <input title="WIDTH" value="10em" /><input title="length" value="5" /> are shown below.
    +
    Rule: input=title(maxlen=60/minlen=6), value
    Output: <input value="10em" /><input title="length" value="5" />
    +
    Rule: input=title(), value(maxval=8/default=6)
    Output: <input title="WIDTH" value="6" /><input title="length" value="5" />
    +
    Rule: input=title(nomatch=%w.d%i), value(match=%em%/default=6em)
    Output: <input value="10em" /><input title="length" value="6em" />
    +
    Rule: input=title(oneof=height|depth/default=depth), value(noneof=5|6)
    Output: <input title="depth" value="10em" /><input title="depth" />
    +
    Special characters: The characters ;, ,, /, (, ), |, ~ and space have special meanings in the rules. Words in the rules that use such characters, or the characters themselves, should be escaped by enclosing in pairs of double-quotes ("). A back-tick (`) can be used to escape a literal ". An example rule illustrating this is input=value(maxlen=30/match="/^\w/"/default="your `"ID`"").
    +
    Note: To deny an attribute for all elements for which it is legal, $config["deny_attribute"] (see section 3.4) can be used instead of $spec. Also, attributes can be allowed element-specifically through $spec while being denied globally through $config["deny_attribute"]. The hook_tag parameter (section 3.4.9) can also be possibly used to implement a functionality like that achieved using $spec functionality.
    +
    $spec can also be used to permit custom, non-standard attributes as well as custom rules for standard attributes. Thus, the following value of $spec will permit the custom uses of the standard rel attribute in input (not permitted as per standards) and of a non-standard attribute, vFlag, in img.
    +
    + +    $spec = 'img=vFlag; input=rel' +
    +
    +  The attribute names can contain alphabets, colons (:) and hyphens (-), but they must start with an alphabet.
    + +
    +

    +2.4  Performance time & memory usage +

    (to top)
    +
    +  The time and memory consumed during text processing by htmLawed depends on its configuration, the size of the input, and the amount, nestedness and well-formedness of the HTML markup within the input. In particular, tag balancing and beautification each can increase the processing time by about a quarter.
    +
    +  The htmLawed demo can be used to evaluate the performance and effects of different types of input and $config.
    + +
    +

    +2.5  Some security risks to keep in mind +

    (to top)
    +
    +  When setting the parameters/arguments (like those to allow certain HTML elements) for use with htmLawed, one should bear in mind that the setting may let through potentially dangerous HTML code which is meant to steal user-data, deface a website, render a page non-functional, etc. Unless end-users, either people or software, supplying the content are completely trusted, security issues arising from the degree of HTML usage permitted through htmLawed's setting should be considered. For example, following increase security risks:
    +
    +  *  Allowing script, applet, embed, iframe or object elements, or certain of their attributes like allowscriptaccess
    +
    +  *  Allowing HTML comments (some Internet Explorer versions are vulnerable with, e.g., <!--[if gte IE 4]><script>alert("xss");</script><![endif]-->
    +
    +  *  Allowing dynamic CSS expressions (some Internet Explorer versions are vulnerable)
    +
    +  *  Allowing the style attribute
    +
    +  To remove unsecure HTML, code-developers using htmLawed must set $config appropriately. E.g., $config["elements"] = "* -script" to deny the script element (section 3.3), $config["safe"] = 1 to auto-configure ceratin htmLawed parameters for maximizing security (section 3.6), etc.
    +
    +  Permitting the *style* attribute brings in risks of click-jacking, phishing, web-page overlays, etc., even when the safe parameter is enabled (see section 3.6). Except for URLs and a few other things like CSS dynamic expressions, htmLawed currently does not check every CSS style property. It does provide ways for the code-developer implementing htmLawed to do such checks through htmLawed's $spec argument, and through the hook_tag parameter (see section 3.4.8 for more). Disallowing style completely and relying on CSS classes and stylesheet files is recommended.
    +
    +  htmLawed does not check or correct the character encoding of the input it receives. In conjunction with permissive circumstances, such as when the character encoding is left undefined through HTTP headers or HTML meta tags, this can allow for an exploit (like Google's UTF-7/XSS vulnerability of the past).
    + +
    +

    +2.6  Use without modifying old kses() code +

    (to top)
    +
    +  The Kses PHP script is used by many applications (like WordPress). It is possible to have such applications use htmLawed instead, since it is compatible with code that calls the kses() function declared in the Kses file (usually named kses.php). E.g., application code like this will continue to work after replacing Kses with htmLawed:
    +
    + +    $comment_filtered = kses($comment_input, array('a'=>array(), 'b'=>array(), 'i'=>array())); +
    +
    +  For some of the $config parameters, htmLawed will use values other than the default ones. These are indicated by ^ in section 2.2. To force htmLawed to use other values, function kses() in the htmLawed code should be edited -- a few configurable parameters/variables need to be changed.
    +
    +  If the application uses a Kses file that has the kses() function declared, then, to have the application use htmLawed instead of Kses, simply rename htmLawed.php (to kses.php, e.g.) and replace the Kses file (or just replace the code in the Kses file with the htmLawed code). If the kses() function in the Kses file had been renamed by the application developer (e.g., in WordPress, it is named wp_kses()), then appropriately rename the kses() function in the htmLawed code.
    +
    +  If the Kses file used by the application has been highly altered by the application developers, then one may need a different approach. E.g., with WordPress, it is best to copy the htmLawed code to wp_includes/kses.php, rename the newly added function kses() to wp_kses(), and delete the code for the original wp_kses() function.
    +
    +  If the Kses code has a non-empty hook function (e.g., wp_kses_hook() in case of WordPress), then the code for htmLawed's kses_hook() function should be appropriately edited. However, the requirement of the hook function should be re-evaluated considering that htmLawed has extra capabilities. With WordPress, the hook function is an essential one. The following code is suggested for the htmLawed kses_hook() in case of WordPress:
    +
    + +    function kses_hook($string, &$cf, &$spec){ +
    + +    // kses compatibility +
    + +    $allowed_html = $spec; +
    + +    $allowed_protocols = array(); +
    + +    foreach($cf['schemes'] as $v){ +
    + +     foreach($v as $k2=>$v2){ +
    + +      if(!in_array($k2, $allowed_protocols)){ +
    + +       $allowed_protocols[] = $k2; +
    + +      } +
    + +     } +
    + +    } +
    + +    return wp_kses_hook($string, $allowed_html, $allowed_protocols); +
    + +    // eof +
    + +    } +
    + +
    +

    +2.7  Tolerance for ill-written HTML +

    (to top)
    +
    +  htmLawed can work with ill-written HTML code in the input. However, HTML that is too ill-written may not be read as HTML, and may therefore get identified as mere plain text. Following statements indicate the degree of looseness that htmLawed can work with, and can be provided in instructions to writers:
    +
    +  *  Tags must be flanked by < and > with no > inside -- any needed > should be put in as &gt;. It is possible for tag content (element name and attributes) to be spread over many lines instead of being on one. A space may be present between the tag content and >, like <div > and <img / >, but not after the <.
    +
    +  *  Element and attribute names need not be lower-cased.
    +
    +  *  Attribute string of elements may be liberally spaced with tabs, line-breaks, etc.
    +
    +  *  Attribute values may be single- and not double-quoted.
    +
    +  *  Left-padding of numeric entities (like, &#0160;, &x07ff;) with 0 is okay as long as the number of characters between between the & and the ; does not exceed 8. All entities must end with ; though.
    +
    +  *  Named character entities must be properly cased. Thus, &Lt; or &TILDE; will not be recognized as entities and will be neutralized.
    +
    +  *  HTML comments should not be inside element tags (they can be between tags), and should begin with <!-- and end with -->. Characters like <, >, and & may be allowed inside depending on $config, but any --> inside should be put in as --&gt;. Any -- inside will be automatically converted to -, and a space will be added before the comment delimiter -->.
    +
    +  *  CDATA sections should not be inside element tags, and can be in element content only if plain text is allowed for that element. They should begin with <[CDATA[ and end with ]]>. Characters like <, >, and & may be allowed inside depending on $config, but any ]]> inside should be put in as ]]&gt;.
    +
    +  *  For attribute values, character entities &lt;, &gt; and &amp; should be used instead of characters < and >, and & (when & is not part of a character entity). This applies even for Javascript code in values of attributes like onclick.
    +
    +  *  Characters <, >, & and " that are part of actual Javascript, etc., code in script elements should be used as such and not be put in as entities like &gt;. Otherwise, though the HTML will be valid, the code may fail to work. Further, if such characters have to be used, then they should be put inside CDATA sections.
    +
    +  *  Simple instructions like "an opening tag cannot be present between two closing tags" and "nested elements should be closed in the reverse order of how they were opened" can help authors write balanced HTML. If tags are imbalanced, htmLawed will try to balance them, but in the process, depending on $config["keep_bad"], some code/text may be lost.
    +
    +  *  Input authors should be notified of admin-specified allowed elements, attributes, configuration values (like conversion of named entities to numeric ones), etc.
    +
    +  *  With $config["unique_ids"] not 0 and the id attribute being permitted, writers should carefully avoid using duplicate or invalid id values as even though htmLawed will correct/remove the values, the final output may not be the one desired. E.g., when <a id="home"></a><input id="home" /><label for="home"></label> is processed into
    +<a id="home"></a><input id="prefix_home" /><label for="home"></label>.
    +
    +  *  Even if intended HTML is lost from an ill-written input, the processed output will be more secure and standard-compliant.
    +
    +  *  For URLs, unless $config["scheme"] is appropriately set, writers should avoid using escape characters or entities in schemes. E.g., htt&#112; (which many browsers will read as the harmless http) may be considered bad by htmLawed.
    +
    +  *  htmLawed will attempt to put plain text present directly inside blockquote, form, map and noscript elements (illegal as per the specifications) inside auto-generated div elements.
    + +
    +

    +2.8  Limitations & work-arounds +

    (to top)
    +
    +  htmLawed's main objective is to make the input text more standard-compliant, secure for readers, and free of HTML elements and attributes considered undesirable by the administrator. Some of its current limitations, regardless of this objective, are noted below along with work-arounds.
    +
    +  It should be borne in mind that no browser application is 100% standard-compliant, and that some of the standard specifications (like asking for normalization of white-spacing within textarea elements) are clearly wrong. Regarding security, note that unsafe HTML code is not legally invalid per se.
    +
    +  *  htmLawed is meant for input that goes into the body of HTML documents. HTML's head-level elements are not supported, nor are the frameset elements frameset, frame and noframes. Content of the latter elements can, however, be individually filtered through htmLawed.
    +
    +  *  It cannot transform the non-standard embed elements to the standard-compliant object elements. Yet, it can allow embed elements if permitted (embed is widely used and supported). Admins can certainly use the hook_tag parameter (section 3.4.9) to deploy a custom embed-to-object converter function.
    +
    +  *  The only non-standard element that may be permitted is embed; others like noembed and nobr cannot be permitted without modifying the htmLawed code.
    +
    +  *  It cannot handle input that has non-HTML code like SVG and MathML. One way around is to break the input into pieces and passing only those without non-HTML code to htmLawed. Another is described in section 3.9. A third way may be to some how take advantage of the $config["and_mark"] parameter (see section 3.2).
    +
    +  *  By default, htmLawed won't check many attribute values for standard compliance. E.g., width="20m" with the dimension in non-standard m is let through. Implementing universal and strict attribute value checks can make htmLawed slow and resource-intensive. Admins should look at the hook_tag parameter (section 3.4.9) or $spec to enforce finer checks.
    +
    +  *  The attributes, deprecated (which can be transformed too) or not, that it supports are largely those that are in the specifications. Only a few of the proprietary attributes are supported.
    +
    +  *  Except for contained URLs and dynamic expressions (also optional), htmLawed does not check CSS style property values. Admins should look at using the hook_tag parameter (section 3.4.9) or $spec for finer checks. Perhaps the best option is to disallow style but allow class attributes with the right oneof or match values for class, and have the various class style properties in .css CSS stylesheet files.
    +
    +  *  htmLawed does not parse emoticons, decode BBcode, or wikify, auto-converting text to proper HTML. Similarly, it won't convert line-breaks to br elements. Such functions are beyond its purview. Admins should use other code to pre- or post-process the input for such purposes.
    +
    +  *  htmLawed cannot be used to have links force-opened in new windows (by auto-adding appropriate target and onclick attributes to a). Admins should look at Javascript-based DOM-modifying solutions for this. Admins may also be able to use a custom hook function to enforce such checks (hook_tag parameter; see section 3.4.9).
    +
    +  *  Nesting-based checks are not possible. E.g., one cannot disallow p elements specifically inside td while permitting it elsewhere. Admins may be able to use a custom hook function to enforce such checks (hook_tag parameter; see section 3.4.9).
    +
    +  *  Except for optionally converting absolute or relative URLs to the other type, htmLawed will not alter URLs (e.g., to change the value of query strings or to convert http to https. Having absolute URLs may be a standard-requirement, e.g., when HTML is embedded in email messages, whereas altering URLs for other purposes is beyond htmLawed's goals. Admins may be able to use a custom hook function to enforce such checks (hook_tag parameter; see section 3.4.9).
    +
    +  *  Pairs of opening and closing tags that do not enclose any content (like <em></em>) are not removed. This may be against the standard specifications for certain elements (e.g., table). However, presence of such standard-incompliant code will not break the display or layout of content. Admins can also use simple regex-based code to filter out such code.
    +
    +  *  htmLawed does not check for certain element orderings described in the standard specifications (e.g., in a table, tbody is allowed before tfoot). Admins may be able to use a custom hook function to enforce such checks (hook_tag parameter; see section 3.4.9).
    +
    +  *  htmLawed does not check the number of nested elements. E.g., it will allow two caption elements in a table element, illegal as per the specifications. Admins may be able to use a custom hook function to enforce such checks (hook_tag parameter; see section 3.4.9).
    +
    +  *  htmLawed might convert certain entities to actual characters and remove backslashes and CSS comment-markers (/*) in style attribute values in order to detect malicious HTML like crafted IE-specific dynamic expressions like &#101;xpression.... If this is too harsh, admins can allow CSS expressions through htmLawed core but then use a custom function through the hook_tag parameter (section 3.4.9) to more specifically identify CSS expressions in the style attribute values. Also, using $config["style_pass"], it is possible to have htmLawed pass style attribute values without even looking at them (section 3.4.8).
    +
    +  *  htmLawed does not correct certain possible attribute-based security vulnerabilities (e.g., <a href="http://x%22+style=%22background-image:xss">x</a>). These arise when browsers mis-identify markup in escaped text, defeating the very purpose of escaping text (a bad browser will read the given example as <a href="http://x" style="background-image:xss">x</a>).
    +
    +  *  Because of poor Unicode support in PHP, htmLawed does not remove the high value HTML-invalid characters with multi-byte code-points. Such characters however are extremely unlikely to be in the input. (see section 3.1).
    +
    +  *  htmLawed does not check or correct the character encoding of the input it receives. In conjunction with permitting circumstances such as when the character encoding is left undefined through HTTP headers or HTML meta tags, this can permit an exploit (like Google's UTF-7/XSS vulnerability of the past). Also, htmLawed can mangle input text if it is not well-formed in terms of character encoding. Administrators can consider using code available elsewhere to check well-formedness of input text characters to correct any defect.
    +
    +  *  htmLawed is expected to work with input texts in ASCII-compatible single byte encodings such as national variants of ASCII (like ISO-646-DE/German of the ISO 646 standard), extended ASCII variants (like ISO 8859-10/Turkish of the ISO 8859/ISO Latin standard), ISO 8859-based Windows variants (like Windows 1252), EBCDIC, Shift JIS (Japanese), GB-Roman (Chinese), and KS-Roman (Korean). It should also properly handle texts with variable byte encodings like UTF-7 (Unicode) and UTF-8 (Unicode). However, htmLawed may mangle input texts with double byte encodings like UTF-16 (Unicode), JIS X 0208:1997 (Japanese) and K SX 1001:1992 (Korean), or the UTF-32 (Unicode) quadruple byte encoding. If an input text has such an encoding, administrators can use PHP's iconv functions, or some other mean, to convert text to UTF-8 before passing it to htmLawed.
    +
    +  *  Like any script using PHP's PCRE regex functions, PHP setup-specific low PCRE limit values can cause htmLawed to at least partially fail with very long input texts.
    + +
    +

    +2.9  Examples of usage +

    (to top)
    +
    +  Safest, allowing only safe HTML markup --
    +
    + +    $config = array('safe'=>1); +
    + +    $out = htmLawed($in); +
    +
    +  Simplest, allowing all valid HTML markup except javascript: --
    +
    + +    $out = htmLawed($in); +
    +
    +  Allowing all valid HTML markup including javascript: --
    +
    + +    $config = array('schemes'=>'*:*'); +
    + +    $out = htmLawed($in, $config); +
    +
    +  Allowing only safe HTML and the elements a, em, and strong --
    +
    + +    $config = array('safe'=>1, 'elements'=>'a, em, strong'); +
    + +    $out = htmLawed($in, $config); +
    +
    +  Not allowing elements script and object --
    +
    + +    $config = array('elements'=>'* -script -object'); +
    + +    $out = htmLawed($in, $config); +
    +
    +  Not allowing attributes id and style --
    +
    + +    $config = array('deny_attribute'=>'id, style'); +
    + +    $out = htmLawed($in, $config); +
    +
    +  Permitting only attributes title and href --
    +
    + +    $config = array('deny_attribute'=>'* -title -href'); +
    + +    $out = htmLawed($in, $config); +
    +
    +  Remove bad/disallowed tags altogether instead of converting them to entities --
    +
    + +    $config = array('keep_bad'=>0); +
    + +    $out = htmLawed($in, $config); +
    +
    +  Allowing attribute title only in a and not allowing attributes id, style, or scriptable on* attributes like onclick --
    +
    + +    $config = array('deny_attribute'=>'title, id, style, on*'); +
    + +    $spec = 'a=title'; +
    + +    $out = htmLawed($in, $config, $spec); +
    +
    +  Allowing a custom attribute, vFlag, in img and permitting custom use of the standard attribute, rel, in input --
    +
    + +    $spec = 'img=vFlag; input=rel'; +
    + +    $out = htmLawed($in, $config, $spec); +
    +
    +  Some case-studies are presented below.
    +
    1. A blog administrator wants to allow only a, em, strike, strong and u in comments, but needs strike and u transformed to span for better XHTML 1-strict compliance, and, he wants the a links to point only to http or https resources:
    +
    + +    $processed = htmLawed($in, array('elements'=>'a, em, strike, strong, u', 'make_tag_strict'=>1, 'safe'=>1, 'schemes'=>'*:http, https'), 'a=href'); +
    +
    2. An author uses a custom-made web application to load content on his web-site. He is the only one using that application and the content he generates has all types of HTML, including scripts. The web application uses htmLawed primarily as a tool to correct errors that creep in while writing HTML and to take care of the occasional bad characters in copy-paste text introduced by Microsoft Office. The web application provides a preview before submitted input is added to the content. For the previewing process, htmLawed is set up as follows:
    +
    + +    $processed = htmLawed($in, array('css_expression'=>1, 'keep_bad'=>1, 'make_tag_strict'=>1, 'schemes'=>'*:*', 'valid_xhtml'=>1)); +
    +
    +  For the final submission process, keep_bad is set to 6. A value of 1 for the preview process allows the author to note and correct any HTML mistake without losing any of the typed text.
    +
    3. A data-miner is scraping information in a specific table of similar web-pages and is collating the data rows, and uses htmLawed to reduce unnecessary markup and white-spaces:
    +
    + +    $processed = htmLawed($in, array('elements'=>'tr, td', 'tidy'=>-1), 'tr, td ='); +
    + +
    +
    +

    +3  Details +

    (to top)
    +

    +3.1  Invalid/dangerous characters +

    (to top)
    +
    +  Valid characters (more correctly, their code-points) in HTML or XML are, hexadecimally, 9, a, d, 20 to d7ff, and e000 to 10ffff, except fffe and ffff (decimally, 9, 10, 13, 32 to 55295, and 57344 to 1114111, except 65534 and 65535). htmLawed removes the invalid characters 0 to 8, b, c, and e to 1f.
    +
    +  Because of PHP's poor native support for multi-byte characters, htmLawed cannot check for the remaining invalid code-points. However, for various reasons, it is very unlikely for any of those characters to be in the input.
    +
    +  Characters that are discouraged (see section 5.1) but not invalid are not removed by htmLawed.
    +
    +  It (function hl_tag()) also replaces the potentially dangerous (in some Mozilla [Firefox] and Opera browsers) soft-hyphen character (code-point, hexadecimally, ad, or decimally, 173) in attribute values with spaces. Where required, the characters <, >, &, and " are converted to entities.
    +
    +  With $config["clean_ms_char"] set as 1 or 2, many of the discouraged characters (decimal code-points 127 to 159 except 133) that many Microsoft applications incorrectly use (as per the Windows 1252 [Cp-1252] or a similar encoding system), and the character for decimal code-point 133, are converted to appropriate decimal numerical entities (or removed for a few cases)-- see appendix in section 5.4. This can help avoid some display issues arising from copying-pasting of content.
    +
    +  With $config["clean_ms_char"] set as 2, characters for the hexadecimal code-points 82, 91, and 92 (for special single-quotes), and 84, 93, and 94 (for special double-quotes) are converted to ordinary single and double quotes respectively and not to entities.
    +
    +  The character values are replaced with entities/characters and not character values referred to by the entities/characters to keep this task independent of the character-encoding of input text.
    +
    +  The $config["clean_ms_char"] parameter should not be used if authors do not copy-paste Microsoft-created text, or if the input text is not believed to use the Windows 1252 (Cp-1252) or a similar encoding like Cp-1251 (otherwise, for example when UTF-8 encoding is in use, Japanese or Korean characters can get mangled). Further, the input form and the web-pages displaying it or its content should have the character encoding appropriately marked-up.
    + +
    +

    +3.2  Character references/entities +

    (to top)
    +
    +  Valid character entities take the form &*; where * is #x followed by a hexadecimal number (hexadecimal numeric entity; like &#xA0; for non-breaking space), or alphanumeric like gt (external or named entity; like &nbsp; for non-breaking space), or # followed by a number (decimal numeric entity; like &#160; for non-breaking space). Character entities referring to the soft-hyphen character (the &shy; or \xad character; hexadecimal code-point ad [decimal 173]) in URL-accepting attribute values are always replaced with spaces; soft-hyphens in attribute values introduce vulnerabilities in some older versions of the Opera and Mozilla [Firefox] browsers.
    +
    +  htmLawed (function hl_ent()):
    +
    +  *  Neutralizes entities with multiple leading zeroes or missing semi-colons (potentially dangerous)
    +
    +  *  Lowercases the X (for XML-compliance) and A-F of hexadecimal numeric entities
    +
    +  *  Neutralizes entities referring to characters that are HTML-invalid (see section 3.1)
    +
    +  *  Neutralizes entities referring to characters that are HTML-discouraged (code-points, hexadecimally, 7f to 84, 86 to 9f, and fdd0 to fddf, or decimally, 127 to 132, 134 to 159, and 64991 to 64976). Entities referring to the remaining discouraged characters (see section 5.1 for a full list) are let through.
    +
    +  *  Neutralizes named entities that are not in the specs.
    +
    +  *  Optionally converts valid HTML-specific named entities except &gt;, &lt;, &quot;, and &amp; to decimal numeric ones (hexadecimal if $config["hexdec_entity"] is 2) for generic XML-compliance. For this, $config["named_entity"] should be 1.
    +
    +  *  Optionally converts hexadecimal numeric entities to the more widely supported decimal ones. For this, $config["hexdec_entity"] should be 0.
    +
    +  *  Optionally converts decimal numeric entities to the hexadecimal ones. For this, $config["hexdec_entity"] should be 2.
    +
    Neutralization refers to the entitification of & to &amp;.
    +
    Note: htmLawed does not convert entities to the actual characters represented by them; one can pass the htmLawed output through PHP's html_entity_decode function for that.
    +
    Note: If $config["and_mark"] is set, and set to a value other than 0, then the & characters in the original input are replaced with the control character for the hexadecimal code-point 6 (\x06; & characters introduced by htmLawed, e.g., after converting < to &lt;, are not affected). This allows one to distinguish, say, an &gt; introduced by htmLawed and an &gt; put in by the input writer, and can be helpful in further processing of the htmLawed-processed text (e.g., to identify the character sequence o(><)o to generate an emoticon image). When this feature is active, admins should ensure that the htmLawed output is not directly used in web pages or XML documents as the presence of the \x06 can break documents. Before use in such documents, and preferably before any storage, any remaining \x06 should be changed back to &, e.g., with:
    +
    + +    $final = str_replace("\x06", '&', $prelim); +
    +
    +  Also, see section 3.9.
    + +
    +

    +3.3  HTML elements +

    (to top)
    +
    +  htmLawed can be configured to allow only certain HTML elements (tags) in the input. Disallowed elements (just tag-content, and not element-content), based on $config["keep_bad"], are either neutralized (converted to plain text by entitification of < and >) or removed.
    +
    +  E.g., with only em permitted:
    +
    +  Input:
    +
    + +      <em>My</em> website is <a href="http://a.com>a.com</a>. +
    +
    +  Output, with $config["keep_bad"] = 0:
    +
    + +      <em>My</em> website is a.com. +
    +
    +  Output, with $config["keep_bad"] not 0:
    +
    + +      <em>My</em> website is &lt;a href=""&gt;a.com&lt;/a&gt;. +
    +
    +  See section 3.3.3 for differences between the various non-zero $config["keep_bad"] values.
    +
    +  htmLawed by default permits these 86 elements:
    +
    + +    a, abbr, acronym, address, applet, area, b, bdo, big, blockquote, br, button, caption, center, cite, code, col, colgroup, dd, del, dfn, dir, div, dl, dt, em, embed, fieldset, font, form, h1, h2, h3, h4, h5, h6, hr, i, iframe, img, input, ins, isindex, kbd, label, legend, li, map, menu, noscript, object, ol, optgroup, option, p, param, pre, q, rb, rbc, rp, rt, rtc, ruby, s, samp, script, select, small, span, strike, strong, sub, sup, table, tbody, td, textarea, tfoot, th, thead, tr, tt, u, ul, var +
    +
    +  Except for embed (included because of its wide-spread use) and the Ruby elements (rb, rbc, rp, rt, rtc, ruby; part of XHTML 1.1), these are all the elements in the HTML 4/XHTML 1 specs. Strict-specific specs. exclude center, dir, font, isindex, menu, s, strike, and u.
    +
    +  With $config["safe"] = 1, the default set will exclude applet, embed, iframe, object and script; see section 3.6.
    +
    +  When $config["elements"], which specifies allowed elements, is properly defined, and neither empty nor set to 0 or *, the default set is not used. To have elements added to or removed from the default set, a +/- notation is used. E.g., *-script-object implies that only script and object are disallowed, whereas *+embed means that noembed is also allowed. Elements can also be specified as comma separated names. E.g., a, b, i means only a, b and i are permitted. In this notation, *, + and - have no significance and can actually cause a mis-reading.
    +
    +  Some more examples of $config["elements"] values indicating permitted elements (note that empty spaces are liberally allowed for clarity):
    +
    +  *  a, blockquote, code, em, strong -- only a, blockquote, code, em, and strong
    +  *  *-script -- all excluding script
    +  *  * -center -dir -font -isindex -menu -s -strike -u -- only XHTML-Strict elements
    +  *  *+noembed-script -- all including noembed excluding script
    +
    +  Some mis-usages (and the resulting permitted elements) that can be avoided:
    +
    +  *  -* -- none; instead of htmLawed, one might just use, e.g., the htmlspecialchars() PHP function
    +  *  *, -script -- all except script; admin probably meant *-script
    +  *  -*, a, em, strong -- all; admin probably meant a, em, strong
    +  *  * -- all; admin need not have set elements
    +  *  *-form+form -- all; a + will always over-ride any -
    +  *  *, noembed -- only noembed; admin probably meant *+noembed
    +  *  a, +b, i -- only a and i; admin probably meant a, b, i
    +
    +  Basically, when using the +/- notation, commas (,) should not be used, and vice versa, and * should be used with the former but not the latter.
    +
    Note: Even if an element that is not in the default set is allowed through $config["elements"], like noembed in the last example, it will eventually be removed during tag balancing unless such balancing is turned off ($config["balance"] set to 0). Currently, the only way around this, which actually is simple, is to edit the various arrays in the function hl_bal() to accommodate the element and its nesting properties.
    +
    A possibly second way to specify allowed elements is to set $config["parent"] to an element name that supposedly will hold the input, and to set $config["balance"] to 1. During tag balancing (see section 3.3.3), all elements that cannot legally nest inside the parent element will be removed. The parent element is auto-reset to div if $config["parent"] is empty, body, or an element not in htmLawed's default set of 86 elements.
    +
    Tag transformation is possible for improving XHTML-Strict compliance -- most of the deprecated elements are removed or converted to valid XHTML-Strict ones; see section 3.3.2.
    + +

    +3.3.1  Handling of comments and CDATA sections +

    (to top)
    +
    CDATA sections have the format <![CDATA[...anything but not "]]>"...]]>, and HTML comments, <!--...anything but not "-->"... -->. Neither HTML comments nor CDATA sections can reside inside tags. HTML comments can exist anywhere else, but CDATA sections can exist only where plain text is allowed (e.g., immediately inside td element content but not immediately inside tr element content).
    +
    +  htmLawed (function hl_cmtcd()) handles HTML comments or CDATA sections depending on the values of $config["comment"] or $config["cdata"]. If 0, such markup is not looked for and the text is processed like plain text. If 1, it is removed completely. If 2, it is preserved but any <, > and & inside are changed to entities. If 3, they are left as such.
    +
    +  Note that for the last two cases, HTML comments and CDATA sections will always be removed from tag content (function hl_tag()).
    +
    +  Examples:
    +
    +  Input:
    + +    <!-- home link --><a href="home.htm"><![CDATA[x=&y]]>Home</a> +
    +  Output ($config["comment"] = 0, $config["cdata"] = 2):
    + +    &lt;-- home link --&gt;<a href="home.htm"><![CDATA[x=&amp;y]]>Home</a> +
    +  Output ($config["comment"] = 1, $config["cdata"] = 2):
    + +    <a href="home.htm"><![CDATA[x=&amp;y]]>Home</a> +
    +  Output ($config["comment"] = 2, $config["cdata"] = 2):
    + +    <!-- home link --><a href="home.htm"><![CDATA[x=&amp;y]]>Home</a> +
    +  Output ($config["comment"] = 2, $config["cdata"] = 1):
    + +    <!-- home link --><a href="home.htm">Home</a> +
    +  Output ($config["comment"] = 3, $config["cdata"] = 3):
    + +    <!-- home link --><a href="home.htm"><![CDATA[x=&y]]>Home</a> +
    +
    +  For standard-compliance, comments are given the form <!--comment -->, and any -- in the content is made -.
    +
    +  When $config["safe"] = 1, CDATA sections and comments are considered plain text unless $config["comment"] or $config["cdata"] is explicitly specified; see section 3.6.
    + +
    +

    +3.3.2  Tag-transformation for better XHTML-Strict +

    (to top)
    +
    +  If $config["make_tag_strict"] is set and not 0, following non-XHTML-Strict elements (and attributes), even if admin-permitted, are mutated as indicated (element content remains intact; function hl_tag2()):
    +
    +  *  applet - (based on $config["make_tag_strict"], unchanged (1) or removed (2))
    +  *  center - div style="text-align: center;"
    +  *  dir - ul
    +  *  embed - (based on $config["make_tag_strict"], unchanged (1) or removed (2))
    +  *  font (face, size, color) -    span style="font-family: ; font-size: ; color: ;" (size transformation reference)
    +  *  isindex - (based on $config["make_tag_strict"], unchanged (1) or removed (2))
    +  *  menu - ul
    +  *  s - span style="text-decoration: line-through;"
    +  *  strike - span style="text-decoration: line-through;"
    +  *  u - span style="text-decoration: underline;"
    +
    +  For an element with a pre-existing style attribute value, the extra style properties are appended.
    +
    +  Example input:
    +
    + +    <center> +
    + +     The PHP <s>software</s> script used for this <strike>web-page</strike> web-page is <font style="font-weight: bold " face=arial size='+3' color   =  "red  ">htmLawedTest.php</font>, from <u style= 'color:green'>PHP Labware</u>. +
    + +    </center> +
    +
    +  The output:
    +
    + +    <div style="text-align: center;"> +
    + +     The PHP <span style="text-decoration: line-through;">software</span> script used for this <span style="text-decoration: line-through;">web-page</span> web-page is <span style="font-weight: bold; font-family: arial; color: red; font-size: 200%;">htmLawedTest.php</span>, from <span style="color:green; text-decoration: underline;">PHP Labware</span>. +
    + +    </div> +
    + +
    +

    +3.3.3  Tag balancing and proper nesting +

    (to top)
    +
    +  If $config["balance"] is set to 1, htmLawed (function hl_bal()) checks and corrects the input to have properly balanced tags and legal element content (i.e., any element nesting should be valid, and plain text may be present only in the content of elements that allow them).
    +
    +  Depending on the value of $config["keep_bad"] (see section 2.2 and section 3.3), illegal content may be removed or neutralized to plain text by converting < and > to entities:
    +
    0 - remove; this option is available only to maintain Kses-compatibility and should not be used otherwise (see section 2.6)
    1 - neutralize tags and keep element content
    2 - remove tags but keep element content
    3 and 4 - like 1 and 2, but keep element content only if text (pcdata) is valid in parent element as per specs
    5 and 6 -  like 3 and 4, but line-breaks, tabs and spaces are left
    +
    +  Example input (disallowing the p element):
    +
    + +    <*> Pseudo-tags <*> +
    + +    <xml>Non-HTML tag xml</xml> +
    + +    <p> +
    + +    Disallowed tag p +
    + +    </p> +
    + +    <ul>Bad<li>OK</li></ul> +
    +
    +  The output with $config["keep_bad"] = 1:
    +
    + +    &lt;*&gt; Pseudo-tags &lt;*&gt; +
    + +    &lt;xml&gt;Non-HTML tag xml&lt;/xml&gt; +
    + +    &lt;p&gt; +
    + +    Disallowed tag p +
    + +    &lt;/p&gt; +
    + +    <ul>Bad<li>OK</li></ul> +
    +
    +  The output with $config["keep_bad"] = 3:
    +
    + +    &lt;*&gt; Pseudo-tags &lt;*&gt; +
    + +    &lt;xml&gt;Non-HTML tag xml&lt;/xml&gt; +
    + +    &lt;p&gt; +
    + +    Disallowed tag p +
    + +    &lt;/p&gt; +
    + +    <ul><li>OK</li></ul> +
    +
    +  The output with $config["keep_bad"] = 6:
    +
    + +    &lt;*&gt; Pseudo-tags &lt;*&gt; +
    + +    Non-HTML tag xml +
    +
    + +    Disallowed tag p +
    +
    + +    <ul><li>OK</li></ul> +
    +
    +  An option like 1 is useful, e.g., when a writer previews his submission, whereas one like 3 is useful before content is finalized and made available to all.
    +
    Note: In the example above, unlike <*>, <xml> gets considered as a tag (even though there is no HTML element named xml). Thus, the keep_bad parameter's value affects <xml> but not <*>. In general, text matching the regular expression pattern <(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?> is considered a tag (phrase enclosed by the angled brackets < and >, and starting [with an optional slash preceding] with an alphanumeric word that starts with an alphabet...), and is subjected to the keep_bad value.
    +
    +  Nesting/content rules for each of the 86 elements in htmLawed's default set (see section 3.3) are defined in function hl_bal(). This means that if a non-standard element besides embed is being permitted through $config["elements"], the element's tag content will end up getting removed if $config["balance"] is set to 1.
    +
    +  Plain text and/or certain elements nested inside blockquote, form, map and noscript need to be in block-level elements. This point is often missed during manual writing of HTML code. htmLawed attempts to address this during balancing. E.g., if the parent container is set as form, the input B:<input type="text" value="b" />C:<input type="text" value="c" /> is converted to <div>B:<input type="text" value="b" />C:<input type="text" value="c" /></div>.
    + +
    +

    +3.3.4  Elements requiring child elements +

    (to top)
    +
    +  As per specs, the following elements require legal child elements nested inside them:
    +
    + +    blockquote, dir, dl, form, map, menu, noscript, ol, optgroup, rbc, rtc, ruby, select, table, tbody, tfoot, thead, tr, ul +
    +
    +  In some cases, the specs stipulate the number and/or the ordering of the child elements. A table can have 0 or 1 caption, tbody, tfoot, and thead, but they must be in this order: caption, thead, tfoot, tbody.
    +
    +  htmLawed currently does not check for conformance to these rules. Note that any non-compliance in this regard will not introduce security vulnerabilities, crash browser applications, or affect the rendering of web-pages.
    +
    +  With $config["direct_list_nest"] set to 1, htmLawed will allow direct nesting of an ol or ul list within another ol or ul without requiring the child list to be within an li of the parent list. While this is not standard-compliant, directly nested lists are rendered properly by almost all browsers. The parameter $config["direct_list_nest"] has no effect if tag-balancing (section 3.3.3) is turned off.
    + +
    +

    +3.3.5  Beautify or compact HTML +

    (to top)
    +
    +  By default, htmLawed will neither beautify HTML code by formatting it with indentations, etc., nor will it make it compact by removing un-needed white-space.(It does always properly white-space tag content.)
    +
    +  As per the HTML standards, spaces, tabs and line-breaks in web-pages (except those inside pre elements) are all considered equivalent, and referred to as white-spaces. Browser applications are supposed to consider contiguous white-spaces as just a single space, and to disregard white-spaces trailing opening tags or preceding closing tags. This white-space normalization allows the use of text/code beautifully formatted with indentations and line-spacings for readability. Such pretty HTML can, however, increase the size of web-pages, or make the extraction or scraping of plain text cumbersome.
    +
    +  With the $config parameter tidy, htmLawed can be used to beautify or compact the input text. Input with just plain text and no HTML markup is also subject to this. Besides pre, the script and textarea elements, CDATA sections, and HTML comments are not subjected to the tidying process.
    +
    +  To compact, use $config["tidy"] = -1; single instances or runs of white-spaces are replaced with a single space, and white-spaces trailing and leading open and closing tags, respectively, are removed.
    +
    +  To beautify, $config["tidy"] is set as 1, or for customized tidying, as a string like 2s2n. The s or t character specifies the use of spaces or tabs for indentation. The first and third characters, any of the digits 0-9, specify the number of spaces or tabs per indentation, and any parental lead spacing (extra indenting of the whole block of input text). The r and n characters are used to specify line-break characters: n for \n (Unix/Mac OS X line-breaks), rn or nr for \r\n (Windows/DOS line-breaks), or r for \r.
    +
    +  The $config["tidy"] value of 1 is equivalent to 2s0n. Other $config["tidy"] values are read loosely: a value of 4 is equivalent to 4s0n; t2, to 1t2n; s, to 2s0n; 2TR, to 2t0r; T1, to 1t1n; nr3, to 3s0nr, and so on. Except in the indentations and line-spacings, runs of white-spaces are replaced with a single space during beautification.
    +
    +  Input formatting using $config["tidy"] is not recommended when input text has mixed markup (like HTML + PHP).
    + +
    +
    +

    +3.4  Attributes +

    (to top)
    +
    +  htmLawed will only permit attributes described in the HTML specs (including deprecated ones). It also permits some attributes for use with the embed element (the non-standard embed element is supported in htmLawed because of its widespread use), and the the xml:space attribute (valid only in XHTML 1.1). A list of such 111 attributes and the elements they are allowed in is in section 5.2. Using the $spec argument, htmLawed can be forced to permit custom, non-standard attributes as well as custom rules for standard attributes (section 2.3).
    +
    +  When $config["deny_attribute"] is not set, or set to 0, or empty (""), all the 111 attributes are permitted. Otherwise, $config["deny_attribute"] can be set as a list of comma-separated names of the denied attributes. on* can be used to refer to the group of potentially dangerous, script-accepting attributes: onblur, onchange, onclick, ondblclick, onfocus, onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, onreset, onselect and onsubmit.
    +
    +  Note that attributes specified in $config["deny_attribute"] are denied globally, for all elements. To deny attributes for only specific elements, $spec (see section 2.3) can be used. $spec can also be used to element-specifically permit an attribute otherwise denied through $config["deny_attribute"].
    +
    +  With $config["safe"] = 1 (section 3.6), the on* attributes are automatically disallowed.
    +
    Note: To deny all but a few attributes globally, a simpler way to specify $config["deny_attribute"] would be to use the notation * -attribute1 -attribute2 .... Thus, a value of * -title -href implies that except href and title (where allowed as per standards) all other attributes are to be removed. With this notation, the value for the parameter safe (section 3.6) will have no effect on deny_attribute.
    +
    +  htmLawed (function hl_tag()) also:
    +
    +  *  Lower-cases attribute names
    +  *  Removes duplicate attributes (last one stays)
    +  *  Gives attributes the form name="value" and single-spaces them, removing unnecessary white-spacing
    +  *  Provides required attributes (see section 3.4.1)
    +  *  Double-quotes values and escapes any " inside them
    +  *  Replaces the possibly dangerous soft-hyphen characters (hexadecimal code-point ad) in the values with spaces
    +  *  Allows custom function to additionally filter/modify attribute values (see section 3.4.9)
    + +

    +3.4.1  Auto-addition of XHTML-required attributes +

    (to top)
    +
    +  If indicated attributes for the following elements are found missing, htmLawed (function hl_tag()) will add them (with values same as attribute names unless indicated otherwise below):
    +
    +  *  area - alt (area)
    +  *  area, img - src, alt (image)
    +  *  bdo - dir (ltr)
    +  *  form - action
    +  *  map - name
    +  *  optgroup - label
    +  *  param - name
    +  *  script - type (text/javascript)
    +  *  textarea - rows (10), cols (50)
    +
    +  Additionally, with $config["xml:lang"] set to 1 or 2, if the lang but not the xml:lang attribute is declared, then the latter is added too, with a value copied from that of lang. This is for better standard-compliance. With $config["xml:lang"] set to 2, the lang attribute is removed (XHTML 1.1 specs).
    +
    +  Note that the name attribute for map, invalid in XHTML 1.1, is also transformed if required -- see section 3.4.6.
    + +
    +

    +3.4.2  Duplicate/invalid id values +

    (to top)
    +
    +  If $config["unique_ids"] is 1, htmLawed (function hl_tag()) removes id attributes with values that are not XHTML-compliant (must begin with a letter and can contain letters, digits, :, ., - and _) or duplicate. If $config["unique_ids"] is a word, any duplicate but otherwise valid value will be appropriately prefixed with the word to ensure its uniqueness. The word should begin with a letter and should contain only letters, numbers, :, ., _ and -.
    +
    +  Even if multiple inputs need to be filtered (through multiple calls to htmLawed), htmLawed ensures uniqueness of id values as it uses a global variable ($GLOBALS["hl_Ids"] array). Further, an admin can restrict the use of certain id values by presetting this variable before htmLawed is called into use. E.g.:
    +
    + +    $GLOBALS['hl_Ids'] = array('top'=>1, 'bottom'=>1, 'myform'=>1); // id values not allowed in input +
    + +    $processed = htmLawed($text); // filter input +
    + +
    +

    +3.4.3  URL schemes (protocols) and scripts in attribute values +

    (to top)
    +
    +  htmLawed edits attributes that take URLs as values if they are found to contain un-permitted schemes. E.g., if the afp scheme is not permitted, then <a href="afp://domain.org"> becomes <a href="denied:afp://domain.org">, and if Javascript is not permitted <a onclick="javascript:xss();"> becomes <a onclick="denied:javascript:xss();">.
    +
    +  By default htmLawed permits these schemes in URLs for the href attribute:
    +
    + +    aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet +
    +
    +  Also, only file, http and https are permitted in attributes whose names start with o (like onmouseover), and in these attributes that accept URLs:
    +
    + +    action, cite, classid, codebase, data, href, longdesc, model, pluginspage, pluginurl, src, style, usemap +
    +
    +  These default sets are used when $config["schemes"] is not set (see section 2.2). To over-ride the defaults, $config["schemes"] is defined as a string of semi-colon-separated sub-strings of type attribute: comma-separated schemes. E.g., href: mailto, http, https; onclick: javascript; src: http, https. For unspecified attributes, file, http and https are permitted. This can be changed by passing schemes for * in $config["schemes"]. E.g., href: mailto, http, https; *: https, https.
    +
    * can be put in the list of schemes to permit all protocols. E.g., style: *; img: http, https results in protocols not being checked in style attribute values. However, in such cases, any relative-to-absolute URL conversion, or vice versa, (section 3.4.4) is not done.
    +
    +  Thus, to allow Javascript, one can set $config["schemes"] as href: mailto, http, https; *: http, https, javascript, or href: mailto, http, https, javascript; *: http, https, javascript, or *: *, and so on.
    +
    +  As a side-note, one may find style: * useful as URLs in style attributes can be specified in a variety of ways, and the patterns that htmLawed uses to identify URLs may mistakenly identify non-URL text.
    +
    ! can be put in the list of schemes to disallow all protocols as well as local URLs. Thus, with href: http, style: !, '<a href="http://cnn.com" style="background-image: url('local.jpg');">CNN</a>' will become '<a href="http://cnn.com" style="background-image: url('denied:local.jpg');">CNN</a>'.
    +
    Note: If URL-accepting attributes other than those listed above are being allowed, then the scheme will not be checked unless the attribute name contains the string src (e.g., dynsrc) or starts with o (e.g., onbeforecopy).
    +
    +  With $config["safe"] = 1, all URLs are disallowed in the style attribute values.
    + +
    +

    +3.4.4  Absolute & relative URLs in attribute values +

    (to top)
    +
    +  htmLawed can make absolute URLs in attributes like href relative ($config["abs_url"] is -1), and vice versa ($config["abs_url"] is 1). URLs in scripts are not considered for this, and so are URLs like #section_6 (fragment), ?name=Tim#show (starting with query string), and ;var=1?name=Tim#show (starting with parameters). Further, this requires that $config["base_url"] be set properly, with the :// and a trailing slash (/), with no query string, etc. E.g., file:///D:/page/, https://abc.com/x/y/, or http://localhost/demo/ are okay, but file:///D:/page/?help=1, abc.com/x/y/ and http://localhost/demo/index.htm are not.
    +
    +  For making absolute URLs relative, only those URLs that have the $config["base_url"] string at the beginning are converted. E.g., with $config["base_url"] = "https://abc.com/x/y/", https://abc.com/x/y/a.gif and https://abc.com/x/y/z/b.gif become a.gif and z/b.gif respectively, while https://abc.com/x/c.gif is not changed.
    +
    +  When making relative URLs absolute, only values for scheme, network location (host-name) and path values in the base URL are inherited. See section 5.5 for more about the URL specification as per RFC 1808.
    + +
    +

    +3.4.5  Lower-cased, standard attribute values +

    (to top)
    +
    +  Optionally, for standard-compliance, htmLawed (function hl_tag()) lower-cases standard attribute values to give, e.g., input type="password" instead of input type="Password", if $config["lc_std_val"] is 1. Attribute values matching those listed below for any of the elements (plus those for the type attribute of button or input) are lower-cased:
    +
    + +    all, baseline, bottom, button, center, char, checkbox, circle, col, colgroup, cols, data, default, file, get, groups, hidden, image, justify, left, ltr, middle, none, object, password, poly, post, preserve, radio, rect, ref, reset, right, row, rowgroup, rows, rtl, submit, text, top +
    +
    + +    a, area, bdo, button, col, form, img, input, object, option, optgroup, param, script, select, table, td, tfoot, th, thead, tr, xml:space +
    +
    +  The following empty (minimized) attributes are always assigned lower-cased values (same as the names):
    +
    + +    checked, compact, declare, defer, disabled, ismap, multiple, nohref, noresize, noshade, nowrap, readonly, selected +
    + +
    +

    +3.4.6  Transformation of deprecated attributes +

    (to top)
    +
    +  If $config["no_deprecated_attr"] is 0, then deprecated attributes (see appendix in section 5.2) are removed and, in most cases, their values are transformed to CSS style properties and added to the style attributes (function hl_tag()). Except for bordercolor for table, tr and td, the scores of proprietary attributes that were never part of any cross-browser standard are not supported.
    +
    Note: The attribute target for a is allowed even though it is not in XHTML 1.0 specs. This is because of the attribute's wide-spread use and browser-support, and because the attribute is valid in XHTML 1.1 onwards.
    +
    +  *  align - for img with value of left or right, becomes, e.g., float: left; for div and table with value center, becomes margin: auto; all others become, e.g., text-align: right
    +
    +  *  bgcolor - E.g., bgcolor="#ffffff" becomes background-color: #ffffff
    +  *  border - E.g., height= "10" becomes height: 10px
    +  *  bordercolor - E.g., bordercolor=#999999 becomes border-color: #999999;
    +  *  compact - font-size: 85%
    +  *  clear - E.g., 'clear="all" becomes clear: both
    +
    +  *  height - E.g., height= "10" becomes height: 10px and height="*" becomes height: auto
    +
    +  *  hspace - E.g., hspace="10" becomes margin-left: 10px; margin-right: 10px
    +  *  language - language="VBScript" becomes type="text/vbscript"
    +  *  name - E.g., name="xx" becomes id="xx"
    +  *  noshade - border-style: none; border: 0; background-color: gray; color: gray
    +  *  nowrap - white-space: nowrap
    +  *  size - E.g., size="10" becomes height: 10px
    +  *  start - removed
    +  *  type - E.g., type="i" becomes list-style-type: lower-roman
    +  *  value - removed
    +  *  vspace - E.g., vspace="10" becomes margin-top: 10px; margin-bottom: 10px
    +  *  width - like height
    +
    +  Example input:
    +
    + +    <img src="j.gif" alt="image" name="dad's" /><img src="k.gif" alt="image" id="dad_off" name="dad" /> +
    + +    <br clear="left" /> +
    + +    <hr noshade size="1" /> +
    + +    <img name="img" src="i.gif" align="left" alt="image" hspace="10" vspace="10" width="10em" height="20" border="1" style="padding:5px;" /> +
    + +    <table width="50em" align="center" bgcolor="red"> +
    + +     <tr> +
    + +      <td width="20%"> +
    + +       <div align="center"> +
    + +        <h3 align="right">Section</h3> +
    + +        <p align="right">Para</p> +
    + +        <ol type="a" start="e"><li value="x">First item</li></ol> +
    + +       </div> +
    + +      </td> +
    + +      <td width="*"> +
    + +       <ol type="1"><li>First item</li></ol> +
    + +      </td> +
    + +     </tr> +
    + +    </table> +
    + +    <br clear="all" /> +
    +
    +  And the output with $config["no_deprecated_attr"] = 1:
    +
    + +    <img src="j.gif" alt="image" /><img src="k.gif" alt="image" id="dad_off" /> +
    + +    <br style="clear: left;" /> +
    + +    <hr style="border-style: none; border: 0; background-color: gray; color: gray; size: 1px;" /> +
    + +    <img src="i.gif" alt="image" width="10em" height="20" style="padding:5px; float: left; margin-left: 10px; margin-right: 10px; margin-top: 10px; margin-bottom: 10px; border: 1px;" id="img" /> +
    + +    <table width="50em" style="margin: auto; background-color: red;"> +
    + +     <tr> +
    + +      <td style="width: 20%;"> +
    + +       <div style="margin: auto;"> +
    + +        <h3 style="text-align: right;">Section</h3> +
    + +        <p style="text-align: right;">Para</p> +
    + +        <ol style="list-style-type: lower-latin;"><li>First item</li></ol> +
    + +       </div> +
    + +      </td> +
    + +      <td style="width: auto;"> +
    + +       <ol style="list-style-type: decimal;"><li>First item</li></ol> +
    + +      </td> +
    + +     </tr> +
    + +    </table> +
    + +    <br style="clear: both;" /> +
    +
    +  For lang, deprecated in XHTML 1.1, transformation is taken care of through $config["xml:lang"]; see section 3.4.1.
    +
    +  The attribute name is deprecated in form, iframe, and img, and is replaced with id if an id attribute doesn't exist and if the name value is appropriate for id. For such replacements for a and map, for which the name attribute is deprecated in XHTML 1.1, $config["no_deprecated_attr"] should be set to 2 (when set to 1, for these two elements, the name attribute is retained).
    + +
    +

    +3.4.7  Anti-spam & href +

    (to top)
    +
    +  htmLawed (function hl_tag()) can check the href attribute values (link addresses) as an anti-spam (email or link spam) measure.
    +
    +  If $config["anti_mail_spam"] is not 0, the @ of email addresses in href values like mailto:a@b.com is replaced with text specified by $config["anti_mail_spam"]. The text should be of a form that makes it clear to others that the address needs to be edited before a mail is sent; e.g., <remove_this_antispam>@ (makes the example address a<remove_this_antispam>@b.com).
    +
    +  For regular links, one can choose to have a rel attribute with nofollow in its value (which tells some search engines to not follow a link). This can discourage link spammers. Additionally, or as an alternative, one can choose to empty the href value altogether (disable the link).
    +
    +  For use of these options, $config["anti_link_spam"] should be set as an array with values regex1 and regex2, both or one of which can be empty (like array("", "regex2")) to indicate that that option is not to be used. Otherwise, regex1 or regex2 should be PHP- and PCRE-compatible regular expression patterns: href values will be matched against them and those matching the pattern will accordingly be treated.
    +
    +  Note that the regular expressions should have delimiters, and be well-formed and preferably fast. Absolute efficiency/accuracy is often not needed.
    +
    +  An example, to have a rel attribute with nofollow for all links, and to disable links that do not point to domains abc.com and xyz.org:
    +
    + +    $config["anti_link_spam"] = array('`.`', '`://\W*(?!(abc\.com|xyz\.org))`'); +
    + +
    +

    +3.4.8  Inline style properties +

    (to top)
    +
    +  htmLawed can check URL schemes and dynamic expressions (to guard against Javascript, etc., script-based insecurities) in inline CSS style property values in the style attributes. (CSS properties like background-image that accept URLs in their values are noted in section 5.3.) Dynamic CSS expressions that allow scripting in the IE browser, and can be a vulnerability, can be removed from property values by setting $config["css_expression"] to 1 (default setting). Note that when $config["css_expression"] is set to 1, htmLawed will remove /* from the style values.
    +
    Note: Because of the various ways of representing characters in attribute values (URL-escapement, entitification, etc.), htmLawed might alter the values of the style attribute values, and may even falsely identify dynamic CSS expressions and URL schemes in them. If this is an important issue, checking of URLs and dynamic expressions can be turned off ($config["schemes"] = "...style:*...", see section 3.4.3, and $config["css_expression"] = 0). Alternately, admins can use their own custom function for finer handling of style values through the hook_tag parameter (see section 3.4.9).
    +
    +  It is also possible to have htmLawed let through any style value by setting $config["style_pass"] to 1.
    +
    +  As such, it is better to set up a CSS file with class declarations, disallow the style attribute, set a $spec rule (see section 2.3) for class for the oneof or match parameter, and ask writers to make use of the class attribute.
    + +
    +

    +3.4.9  Hook function for tag content +

    (to top)
    +
    +  It is possible to utilize a custom hook function to alter the tag content htmLawed has finalized (i.e., after it has checked/corrected for required attributes, transformed attributes, lower-cased attribute names, etc.).
    +
    +  When $config parameter hook_tag is set to the name of a function, htmLawed (function hl_tag()) will pass on the element name, and, in the case of an opening tag, the finalized attribute name-value pairs as array elements to the function. The function, after completing a task such as filtering or tag transformation, will typically return an empty string, the full opening tag string like <element_name attribute_1_name="attribute_1_value"...> (for empty elements like img and input, the element-closing slash / should also be included), etc.
    +
    +  Any hook_tag function, since htmLawed version 1.1.11, also receives names of elements in closing tags, such as a in the closing </a> tag of the element <a href="http://cnn.com">CNN</a>. Unlike for opening tags, no other value (i.e., the attribute name-value array) is passed to the function since a closing tag contains only element names. Typically, the function will return an empty string or a full closing tag (like </a>).
    +
    +  This is a powerful functionality that can be exploited for various objectives: consolidate-and-convert inline style attributes to class, convert embed elements to object, permit only one caption element in a table element, disallow embedding of certain types of media, inject HTML, use CSSTidy to sanitize style attribute values, etc.
    +
    +  As an example, the custom hook code below can be used to force a series of specifically ordered id attributes on all elements, and a specific param element inside all object elements:
    +
    + +    function my_tag_function($element, $attribute_array=0){ +
    +
    + +      // If second argument is not received, it means a closing tag is being handled +
    + +      if(is_numeric($attribute_array)){ +
    + +        return "</$element>"; +
    + +      } +
    +
    + +      static $id = 0; +
    + +      // Remove any duplicate element +
    + +      if($element == 'param' && isset($attribute_array['allowscriptaccess'])){ +
    + +        return ''; +
    + +      } +
    +
    + +      $new_element = ''; +
    +
    + +      // Force a serialized ID number +
    + +      $attribute_array['id'] = 'my_'. $id; +
    + +      ++$id; +
    +
    + +      // Inject param for allowscriptaccess +
    + +      if($element == 'object'){ +
    + +        $new_element = '<param id='my_'. $id; allowscriptaccess="never" />'; +
    + +        ++$id; +
    + +      } +
    +
    + +      $string = ''; +
    + +      foreach($attribute_array as $k=>$v){ +
    + +        $string .= " {$k}=\"{$v}\""; +
    + +      } +
    +
    + +      static $empty_elements = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); +
    +
    + +      return "<{$element}{$string}". (isset($in_array($element, $empty_elements) ? ' /' : ''). '>'. $new_element; +
    + +    } +
    +
    +  The hook_tag parameter is different from the hook parameter (section 3.7).
    +
    +  Snippets of hook function code developed by others may be available on the htmLawed website.
    + +
    +
    +

    +3.5  Simple configuration directive for most valid XHTML +

    (to top)
    +
    +  If $config["valid_xhtml"] is set to 1, some relevant $config parameters (indicated by ~ in section 2.2) are auto-adjusted. This allows one to pass the $config argument with a simpler value. If a value for a parameter auto-set through valid_xhtml is still manually provided, then that value will over-ride the auto-set value.
    + +
    +

    +3.6  Simple configuration directive for most safe HTML +

    (to top)
    +
    Safe HTML refers to HTML that is restricted to reduce the vulnerability for scripting attacks (such as XSS) based on HTML code which otherwise may still be legal and compliant with the HTML standard specs. When elements such as script and object, and attributes such as onmouseover and style are allowed in the input text, an input writer can introduce malevolent HTML code. Note that what is considered safe depends on the nature of the web application and the trust-level accorded to its users.
    +
    +  htmLawed allows an admin to use $config["safe"] to auto-adjust multiple $config parameters (such as elements which declares the allowed element-set), which otherwise would have to be manually set. The relevant parameters are indicated by " in section 2.2). Thus, one can pass the $config argument with a simpler value.
    +
    +  With the value of 1, htmLawed considers CDATA sections and HTML comments as plain text, and prohibits the applet, embed, iframe, object and script elements, and the on* attributes like onclick. ( There are $config parameters like css_expression that are not affected by the value set for safe but whose default values still contribute towards a more safe output.) Further, URLs with schemes (see section 3.4.3) are neutralized so that, e.g., style="moz-binding:url(http://danger)" becomes style="moz-binding:url(denied:http://danger)".
    +
    +  Admins, however, may still want to completely deny the style attribute, e.g., with code like
    +
    + +    $processed = htmLawed($text, array('safe'=>1, 'deny_attribute'=>'style')); +
    +
    +  Permitting the style attribute brings in risks of click-jacking, etc. CSS property values can render a page non-functional or be used to deface it. Except for URLs, dynamic expressions, and some other things, htmLawed does not completely check style values. It does provide ways for the code-developer implementing htmLawed to do such checks through the $spec argument, and through the hook_tag parameter (see section 3.4.8 for more). Disallowing style completely and relying on CSS classes and stylesheet files is recommended.
    +
    +  If a value for a parameter auto-set through safe is still manually provided, then that value can over-ride the auto-set value. E.g., with $config["safe"] = 1 and $config["elements"] = "*+script", script, but not applet, is allowed.
    +
    +  A page illustrating the efficacy of htmLawed's anti-XSS abilities with safe set to 1 against XSS vectors listed by RSnake may be available here.
    + +
    +

    +3.7  Using a hook function +

    (to top)
    +
    +  If $config["hook"] is not set to 0, then htmLawed will allow preliminarily processed input to be altered by a hook function named by $config["hook"] before starting the main work (but after handling of characters, entities, HTML comments and CDATA sections -- see code for function htmLawed()).
    +
    +  The hook function also allows one to alter the finalized values of $config and $spec.
    +
    +  Note that the hook parameter is different from the hook_tag parameter (section 3.4.9).
    +
    +  Snippets of hook function code developed by others may be available on the htmLawed website.
    + +
    +

    +3.8  Obtaining finalized parameter values +

    (to top)
    +
    +  htmLawed can assign the finalized $config and $spec values to a variable named by $config["show_setting"]. The variable, made global by htmLawed, is set as an array with three keys: config, with the $config value, spec, with the $spec value, and time, with a value that is the Unix time (the output of PHP's microtime() function) when the value was assigned. Admins should use a PHP-compliant variable name (e.g., one that does not begin with a numerical digit) that does not conflict with variable names in their non-htmLawed code.
    +
    +  The values, which are also post-hook function (if any), can be used to auto-generate information (on, e.g., the elements that are permitted) for input writers.
    + +
    +

    +3.9  Retaining non-HTML tags in input with mixed markup +

    (to top)
    +
    +  htmLawed does not remove certain characters that, though invalid, are nevertheless discouraged in HTML documents as per the specifications (see section 5.1). This can be utilized to deal with input that contains mixed markup. Input that may have HTML markup as well as some other markup that is based on the <, > and & characters is considered to have mixed markup. The non-HTML markup can be rather proprietary (like markup for emoticons/smileys), or standard (like MathML or SVG). Or it can be programming code meant for execution/evaluation (such as embedded PHP code).
    +
    +  To deal with such mixed markup, the input text can be pre-processed to hide the non-HTML markup by specifically replacing the <, > and & characters with some of the HTML-discouraged characters (see section 3.1.2). Post-htmLawed processing, the replacements are reverted.
    +
    +  An example (mixed HTML and PHP code in input text):
    +
    + +    $text = preg_replace('`<\?php(.+?)\?>`sm', "\x83?php\\1?\x84", $text); +
    + +    $processed = htmLawed($text); +
    + +    $processed = preg_replace('`\x83\?php(.+?)\?\x84`sm', '<?php$1?>', $processed); +
    +
    +  This code will not work if $config["clean_ms_char"] is set to 1 (section 3.1), in which case one should instead deploy a hook function (section 3.7). (htmLawed internally uses certain control characters, code-points 1 to 7, and use of these characters as markers in the logic of hook functions may cause issues.)
    +
    +  Admins may also be able to use $config["and_mark"] to deal with such mixed markup; see section 3.2.
    + +
    +
    +

    +4  Other +

    (to top)
    +

    +4.1  Support +

    (to top)
    +
    +  A careful reading of this documentation may provide an answer.
    +
    +  Software updates and forum-based community-support may be found at http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed. For general PHP issues (not htmLawed-specific), support may be found through internet searches and at http://php.net.
    + +
    +

    +4.2  Known issues +

    (to top)
    +
    +  See section 2.8.
    + +
    +

    +4.3  Change-log +

    (to top)
    +
    +  (The release date for the downloadable package of files containing documentation, demo script, test-cases, etc., besides the htmLawed.php file, may be updated without a change-log entry if the secondary files, but not htmLawed per se, are revised.)
    +
    Version number - Release date. Notes
    +
    +  1.1.16 - 29 August 2013. Fix for a potential security vulnerability arising from specialy encoded space characters in URL schemes/protocols
    +
    +  1.1.15 - 11 August 2013. Improved tidying/prettifying functionality
    +
    +  1.1.14 - 8 August 2012. Fix for possible segmental loss of incremental indentation during tidying when balance is disabled; fix for non-effectuation under some circumstances of a corrective behavior to preserve plain text within elements like blockquote.
    +
    +  1.1.13 - 22 July 2012. Added feature allowing use of custom, non-standard attributes or custom rules for standard attributes
    +
    +  1.1.12 - 5 July 2012. Fix for a bug in identifying an unquoted value of the face attribute
    +
    +  1.1.11 - 5 June 2012. Fix for possible problem with handling of multi-byte characters in attribute values in an mbstring.func_overload enviroment. $config["hook_tag"], if specified, now receives names of elements in closing tags.
    +
    +  1.1.10 - 22 October 2011. Fix for a bug in the tidy functionality that caused the entire input to be replaced with a single space; new parameter, $config["direct_list_nest"] to allow direct descendance of a list in a list. (5 April 2012. Dual licensing from LGPLv3 to LGPLv3 and GPLv2+.)
    +
    +  1.1.9.5 - 6 July 2011. Minor correction of a rule for nesting of li within dir
    +
    +  1.1.9.4 - 3 July 2010. Parameter schemes now accepts ! so any URL, even a local one, can be denied. An issue in which a second URL value in style properties was not checked was fixed.
    +
    +  1.1.9.3 - 17 May 2010. Checks for correct nesting of param
    +
    +  1.1.9.2 - 26 April 2010. Minor fix regarding rendering of denied URL schemes
    +
    +  1.1.9.1 - 26 February 2010. htmLawed now uses the LGPL version 3 license; support for flashvars attribute for embed
    +
    +  1.1.9 - 22 December 2009. Soft-hyphens are now removed only from URL-accepting attribute values
    +
    +  1.1.8.1 - 16 July 2009. Minor code-change to fix a PHP error notice
    +
    +  1.1.8 - 23 April 2009. Parameter deny_attribute now accepts the wild-card *, making it simpler to specify its value when all but a few attributes are being denied; fixed a bug in interpreting $spec
    +
    +  1.1.7 - 11-12 March 2009. Attributes globally denied through deny_attribute can be allowed element-specifically through $spec; $config["style_pass"] allowing letting through any style value introduced; altered logic to catch certain types of dynamic crafted CSS expressions
    +
    +  1.1.3-6 - 28-31 January - 4 February 2009. Altered logic to catch certain types of dynamic crafted CSS expressions
    +
    +  1.1.2 - 22 January 2009. Fixed bug in parsing of font attributes during tag transformation
    +
    +  1.1.1 - 27 September 2008. Better nesting correction when omitable closing tags are absent
    +
    +  1.1 - 29 June 2008. $config["hook_tag"] and $config["tidy"] introduced for custom tag/attribute check/modification/injection and output compaction/beautification; fixed a regex-in-$spec parsing bug
    +
    +  1.0.9 - 11 June 2008. Fix for a bug in checks for invalid HTML code-point entities
    +
    +  1.0.8 - 15 May 2008. Permit bordercolor attribute for table, td and tr
    +
    +  1.0.7 - 1 May 2008. Support for wmode attribute for embed; $config["show_setting"] introduced; improved $config["elements"] evaluation
    +
    +  1.0.6 - 20 April 2008. $config["and_mark"] introduced
    +
    +  1.0.5 - 12 March 2008. style URL schemes essentially disallowed when $config safe is on; improved regex for CSS expression search
    +
    +  1.0.4 - 10 March 2008. Improved corrections for blockquote, form, map and noscript
    +
    +  1.0.3 - 3 March 2008. Character entities for soft-hyphens are now replaced with spaces (instead of being removed); fix for a bug allowing td directly inside table; $config["safe"] introduced
    +
    +  1.0.2 - 13 February 2008. Improved implementation of $config["keep_bad"]
    +
    +  1.0.1 - 7 November 2007. Improved regex for identifying URLs, protocols and dynamic expressions (hl_tag() and hl_prot()); no error display with hl_regex()
    +
    +  1.0 - 2 November 2007. First release
    + +
    +

    +4.4  Testing +

    (to top)
    +
    +  To test htmLawed using a form interface, a demo web-page is provided with the htmLawed distribution (htmLawed.php and htmLawedTest.php should be in the same directory on the web-server). A file with test-cases is also provided.
    + +
    +

    +4.5  Upgrade, & old versions +

    (to top)
    +
    +  Upgrading is as simple as replacing the previous version of htmLawed.php (assuming it was not modified for customized features). As htmLawed output is almost always used in static documents, upgrading should not affect old, finalized content.
    +
    Important  The following upgrades may affect the functionality of a specific htmLawed installation:
    +
    +  (1) From version 1.1-1.1.10 to 1.1.11 (or later), if a hook_tag function is in use: In version 1.1.11, elements in closing tags (and not just the opening tags) are also passed to the function. There are no attribute names/values to pass, so a hook_tag function receives only the element name. The hook_tag function therefore may have to be edited. See section 3.4.9.
    +
    +  Old versions of htmLawed may be available online. E.g., for version 1.0, check http://www.bioinformatics.org/phplabware/downloads/htmLawed1.zip, for 1.1.1, htmLawed111.zip, and for 1.1.10, htmLawed1110.zip.
    + +
    +

    +4.6  Comparison with HTMLPurifier +

    (to top)
    +
    +  The HTMLPurifier PHP library by Edward Yang is a very good HTML filtering script that uses object oriented PHP code. Compared to htmLawed, it (as of year 2010):
    +
    +  *  does not support PHP versions older than 5.0 (HTMLPurifier dropped PHP 4 support after version 2)
    +
    +  *  is 15-20 times bigger (scores of files totalling more than 750 kb)
    +
    +  *  consumes 10-15 times more RAM memory (just including the HTMLPurifier files without calling the filter requires a few MBs of memory)
    +
    +  *  is expectedly slower
    +
    +  *  does not allow admins to fully allow all valid HTML (because of incomplete HTML support, it always considers elements like script illegal)
    +
    +  *  lacks many of the extra features of htmLawed (like entity conversions and code compaction/beautification)
    +
    +  *  has poor documentation
    +
    +  However, HTMLPurifier has finer checks for character encodings and attribute values, and can log warnings and errors. Visit the HTMLPurifier website for updated information.
    + +
    +

    +4.7  Use through application plug-ins/modules +

    (to top)
    +
    +  Plug-ins/modules to implement htmLawed in applications such as Drupal and DokuWiki may have been developed. Please check the application websites and the forum on the htmLawed site.
    + +
    +

    +4.8  Use in non-PHP applications +

    (to top)
    +
    +  Non-PHP applications written in Python, Ruby, etc., may be able to use htmLawed through system calls to the PHP engine. Such code may have been documented on the internet. Also check the forum on the htmLawed site.
    + +
    +

    +4.9  Donate +

    (to top)
    +
    +  A donation in any currency and amount to appreciate or support this software can be sent by PayPal to this email address: drpatnaik at yahoo dot com.
    + +
    +

    +4.10  Acknowledgements +

    (to top)
    +
    +  Nicholas Alipaz, Bryan Blakey, Pádraic Brady, Dac Chartrand, Ulf Harnhammer, Gareth Heyes, Klaus Leithoff, Lukasz Pilorz, Shelley Powers, Harro Verton, Edward Yang, and many anonymous users.
    +
    +  Thank you!
    + +
    +
    +

    +5  Appendices +

    (to top)
    +

    +5.1  Characters discouraged in XHTML +

    (to top)
    +
    +  Characters represented by the following hexadecimal code-points are not invalid, even though some validators may issue messages stating otherwise.
    +
    7f to 84, 86 to 9f, fdd0 to fddf, 1fffe, 1ffff, 2fffe, 2ffff, 3fffe, 3ffff, 4fffe, 4ffff, 5fffe, 5ffff, 6fffe, 6ffff, 7fffe, 7ffff, 8fffe, 8ffff, 9fffe, 9ffff, afffe, affff, bfffe, bffff, cfffe, cffff, dfffe, dffff, efffe, effff, ffffe, fffff, 10fffe and 10ffff
    + +
    +

    +5.2  Valid attribute-element combinations +

    (to top)
    +
    +  Valid attribute-element combinations as per W3C specs.
    +
    +  *  includes deprecated attributes (marked ^), attributes for the non-standard embed element (marked *), and the proprietary bordercolor (marked ~)
    +  *  only non-frameset, HTML body elements
    +  *  name for a and map, and lang are invalid in XHTML 1.1
    +  *  target is valid for a in XHTML 1.1 and higher
    +  *  xml:space is only for XHTML 1.1
    +
    +  abbr - td, th
    +  accept - form, input
    +  accept-charset - form
    +  accesskey - a, area, button, input, label, legend, textarea
    +  action - form
    +  align - caption^, embed, applet, iframe, img^, input^, object^, legend^, table^, hr^, div^, h1^, h2^, h3^, h4^, h5^, h6^, p^, col, colgroup, tbody, td, tfoot, th, thead, tr
    +  alt - applet, area, img, input
    +  archive - applet, object
    +  axis - td, th
    +  bgcolor - embed, table^, tr^, td^, th^
    +  border - table, img^, object^
    +  bordercolor~ - table, td, tr
    +  cellpadding - table
    +  cellspacing - table
    +  char - col, colgroup, tbody, td, tfoot, th, thead, tr
    +  charoff - col, colgroup, tbody, td, tfoot, th, thead, tr
    +  charset - a, script
    +  checked - input
    +  cite - blockquote, q, del, ins
    +  classid - object
    +  clear - br^
    +  code - applet
    +  codebase - object, applet
    +  codetype - object
    +  color - font
    +  cols - textarea
    +  colspan - td, th
    +  compact - dir, dl^, menu, ol^, ul^
    +  coords - area, a
    +  data - object
    +  datetime - del, ins
    +  declare - object
    +  defer - script
    +  dir - bdo
    +  disabled - button, input, optgroup, option, select, textarea
    +  enctype - form
    +  face - font
    +  flashvars* - embed
    +  for - label
    +  frame - table
    +  frameborder - iframe
    +  headers - td, th
    +  height - embed, iframe, td^, th^, img, object, applet
    +  href - a, area
    +  hreflang - a
    +  hspace - applet, img^, object^
    +  ismap - img, input
    +  label - option, optgroup
    +  language - script^
    +  longdesc - img, iframe
    +  marginheight - iframe
    +  marginwidth - iframe
    +  maxlength - input
    +  method - form
    +  model* - embed
    +  multiple - select
    +  name - button, embed, textarea, applet^, select, form^, iframe^, img^, a^, input, object, map^, param
    +  nohref - area
    +  noshade - hr^
    +  nowrap - td^, th^
    +  object - applet
    +  onblur - a, area, button, input, label, select, textarea
    +  onchange - input, select, textarea
    +  onfocus - a, area, button, input, label, select, textarea
    +  onreset - form
    +  onselect - input, textarea
    +  onsubmit - form
    +  pluginspage* - embed
    +  pluginurl* - embed
    +  prompt - isindex
    +  readonly - textarea, input
    +  rel - a
    +  rev - a
    +  rows - textarea
    +  rowspan - td, th
    +  rules - table
    +  scope - td, th
    +  scrolling - iframe
    +  selected - option
    +  shape - area, a
    +  size - hr^, font, input, select
    +  span - col, colgroup
    +  src - embed, script, input, iframe, img
    +  standby - object
    +  start - ol^
    +  summary - table
    +  tabindex - a, area, button, input, object, select, textarea
    +  target - a^, area, form
    +  type - a, embed, object, param, script, input, li^, ol^, ul^, button
    +  usemap - img, input, object
    +  valign - col, colgroup, tbody, td, tfoot, th, thead, tr
    +  value - input, option, param, button, li^
    +  valuetype - param
    +  vspace - applet, img^, object^
    +  width - embed, hr^, iframe, img, object, table, td^, th^, applet, col, colgroup, pre^
    +  wmode - embed
    +  xml:space - pre, script, style
    +
    +  These are allowed in all but the shown elements:
    +
    +  class - param, script
    +  dir - applet, bdo, br, iframe, param, script
    +  id - script
    +  lang - applet, br, iframe, param, script
    +  onclick - applet, bdo, br, font, iframe, isindex, param, script
    +  ondblclick - applet, bdo, br, font, iframe, isindex, param, script
    +  onkeydown - applet, bdo, br, font, iframe, isindex, param, script
    +  onkeypress - applet, bdo, br, font, iframe, isindex, param, script
    +  onkeyup - applet, bdo, br, font, iframe, isindex, param, script
    +  onmousedown - applet, bdo, br, font, iframe, isindex, param, script
    +  onmousemove - applet, bdo, br, font, iframe, isindex, param, script
    +  onmouseout - applet, bdo, br, font, iframe, isindex, param, script
    +  onmouseover - applet, bdo, br, font, iframe, isindex, param, script
    +  onmouseup - applet, bdo, br, font, iframe, isindex, param, script
    +  style - param, script
    +  title - param, script
    +  xml:lang - applet, br, iframe, param, script
    + +
    +

    +5.3  CSS 2.1 properties accepting URLs +

    (to top)
    +
    +  background
    +  background-image
    +  content
    +  cue-after
    +  cue-before
    +  cursor
    +  list-style
    +  list-style-image
    +  play-during
    + +
    +

    +5.4  Microsoft Windows 1252 character replacements +

    (to top)
    +
    +  Key: d double, l left, q quote, r right, s. single
    +
    +  Code-point (decimal) - hexadecimal value - replacement entity - represented character
    +
    +  127 - 7f - (removed) - (not used)
    +  128 - 80 - &#8364; - euro
    +  129 - 81 - (removed) - (not used)
    +  130 - 82 - &#8218; - baseline s. q
    +  131 - 83 - &#402; - florin
    +  132 - 84 - &#8222; - baseline d q
    +  133 - 85 - &#8230; - ellipsis
    +  134 - 86 - &#8224; - dagger
    +  135 - 87 - &#8225; - d dagger
    +  136 - 88 - &#710; - circumflex accent
    +  137 - 89 - &#8240; - permile
    +  138 - 8a - &#352; - S Hacek
    +  139 - 8b - &#8249; - l s. guillemet
    +  140 - 8c - &#338; - OE ligature
    +  141 - 8d - (removed) - (not used)
    +  142 - 8e - &#381; - Z dieresis
    +  143 - 8f - (removed) - (not used)
    +  144 - 90 - (removed) - (not used)
    +  145 - 91 - &#8216; - l s. q
    +  146 - 92 - &#8217; - r s. q
    +  147 - 93 - &#8220; - l d q
    +  148 - 94 - &#8221; - r d q
    +  149 - 95 - &#8226; - bullet
    +  150 - 96 - &#8211; - en dash
    +  151 - 97 - &#8212; - em dash
    +  152 - 98 - &#732; - tilde accent
    +  153 - 99 - &#8482; - trademark
    +  154 - 9a - &#353; - s Hacek
    +  155 - 9b - &#8250; - r s. guillemet
    +  156 - 9c - &#339; - oe ligature
    +  157 - 9d - (removed) - (not used)
    +  158 - 9e - &#382; - z dieresis
    +  159 - 9f - &#376; - Y dieresis
    + +
    +

    +5.5  URL format +

    (to top)
    +
    +  An absolute URL has a protocol or scheme, a network location or hostname, and, optional path, parameters, query and fragment segments. Thus, an absolute URL has this generic structure:
    +
    + +    (scheme) : (//network location) /(path) ;(parameters) ?(query) #(fragment) +
    +
    +  The schemes can only contain letters, digits, +, . and -. Hostname is the portion after the // and up to the first / (if any; else, up to the end) when : is followed by a // (e.g., abc.com in ftp://abc.com/def); otherwise, it consists of everything after the : (e.g., def@abc.com in mailto:def@abc.com').
    +
    Relative URLs do not have explicit schemes and network locations; such values are inherited from a base URL.
    + +
    +

    +5.6  Brief on htmLawed code +

    (to top)
    +
    +  Much of the code's logic and reasoning can be understood from the documentation above.
    +
    +  The output of htmLawed is a text string containing the processed input. There is no custom error tracking.
    +
    Function arguments for htmLawed are:
    +
    +  *  $in - first argument; a text string; the input text to be processed. Any extraneous slashes added by PHP when magic quotes are enabled should be removed beforehand using PHP's stripslashes() function.
    +
    +  *  $config - second argument; an associative array; optional; named $C within htmLawed code. The array has keys with names like balance and keep_bad, and the values, which can be boolean, string, or array, depending on the key, are read to accordingly set the configurable parameters (indicated by the keys). All configurable parameters receive some default value if the value to be used is not specified by the user through $config. Finalized $config is thus a filtered and possibly larger array.
    +
    +  *  $spec - third argument; a text string; optional. The string has rules, written in an htmLawed-designated format, specifying element-specific attribute and attribute value restrictions. Function hl_spec() is used to convert the string to an associative-array, named $S within htmLawed code, for internal use. Finalized $spec is thus an array.
    +
    Finalized $config and $spec are made global variables while htmLawed is at work. Values of any pre-existing global variables with same names are noted, and their values are restored after htmLawed finishes processing the input (to capture the finalized values, the show_settings parameter of $config should be used). Depending on $config, another global variable hl_Ids, to track id attribute values for uniqueness, may be set. Unlike the other two variables, this one is not reset (or unset) post-processing.
    +
    +  Except for the main function htmLawed() and the functions kses() and kses_hook(), htmLawed's functions are name-spaced using the hl_ prefix. The functions and their roles are:
    +
    +  *  hl_attrval - checking attribute values against $spec
    +  *  hl_bal - tag balancing
    +  *  hl_cmtcd - handling CDATA sections and HTML comments
    +  *  hl_ent - entity handling
    +  *  hl_prot - checking a URL scheme/protocol
    +  *  hl_regex - checking syntax of a regular expression
    +  *  hl_spec - converting user-supplied $spec value to one used by htmLawed internally
    +  *  hl_tag - handling tags
    +  *  hl_tag2 - transforming tags
    +  *  hl_tidy - compact/beautify HTML
    +  *  hl_version - reporting htmLawed version
    +  *  htmLawed - main function
    +  *  kses - main function of kses
    +  *  kses_hook - hook function of kses
    +
    +  The last two are for compatibility with pre-existing code using the kses script. htmLawed's kses() basically passes on the filtering task to htmLawed() function after deciphering $config and $spec from the argument values supplied to it. kses_hook() is an empty function and is meant for being filled with custom code if the kses script users were using one.
    +
    htmLawed() finalizes $spec (with the help of hl_spec()) and $config, and globalizes them. Finalization of $config involves setting default values if an inappropriate or invalid one is supplied. This includes calling hl_regex() to check well-formedness of regular expression patterns if such expressions are user-supplied through $config. htmLawed() then removes invalid characters like nulls and x01 and appropriately handles entities using hl_ent(). HTML comments and CDATA sections are identified and treated as per $config with the help of hl_cmtcd(). When retained, the < and > characters identifying them, and the <, > and & characters inside them, are replaced with control characters (code-points 1 to 5) till any tag balancing is completed.
    +
    +  After this initial processing htmLawed() identifies tags using regex and processes them with the help of hl_tag() --  a large function that analyzes tag content, filtering it as per HTML standards, $config and $spec. Among other things, hl_tag() transforms deprecated elements using hl_tag2(), removes attributes from closing tags, checks attribute values as per $spec rules using hl_attrval(), and checks URL protocols using hl_prot(). htmLawed() performs tag balancing and nesting checks with a call to hl_bal(), and optionally compacts/beautifies the output with proper white-spacing with a call to hl_tidy(). The latter temporarily replaces white-space, and <, > and & characters inside pre, script and textarea elements, and HTML comments and CDATA sections with control characters (code-points 1 to 5, and 7).
    +
    +  htmLawed permits the use of custom code or hook functions at two stages. The first, called inside htmLawed(), allows the input text as well as the finalized $config and $spec values to be altered right after the initial processing (see section 3.7). The second is called by hl_tag() once the tag content is finalized (see section 3.4.9).
    +
    +  The functionality of htmLawed is dictated by the external HTML standard. It is thus coded for a clear-cut objective with not much concern for tweakability. The code is only minimally annotated with comments -- it is not meant to instruct; PHP developers familiar with the HTML specifications will see the logic, and others can always refer to the htmLawed documentation. The compact structuring of the statements is meant to aid a quick grasp of the logic. +
    +
    +
    +


    HTM version of htmLawed_README.txt generated on 29 Aug, 2013 using rTxt2htm from PHP Labware +
    +
    + \ No newline at end of file diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed_README.txt b/mod/htmlawed/vendors/htmLawed/htmLawed_README.txt old mode 100755 new mode 100644 index e4027e465..5e19605e5 --- a/mod/htmlawed/vendors/htmLawed/htmLawed_README.txt +++ b/mod/htmlawed/vendors/htmLawed/htmLawed_README.txt @@ -1,1701 +1,1734 @@ -/* -htmLawed_README.txt, 8 June 2012 -htmLawed 1.1.11, 5 June 2012 -Copyright Santosh Patnaik -Dual licensed with LGPL 3 and GPL 2 or later -A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed -*/ - - -== Content ========================================================== - - -1 About htmLawed - 1.1 Example uses - 1.2 Features - 1.3 History - 1.4 License & copyright - 1.5 Terms used here -2 Usage - 2.1 Simple - 2.2 Configuring htmLawed using the '$config' parameter - 2.3 Extra HTML specifications using the '$spec' parameter - 2.4 Performance time & memory usage - 2.5 Some security risks to keep in mind - 2.6 Use without modifying old 'kses()' code - 2.7 Tolerance for ill-written HTML - 2.8 Limitations & work-arounds - 2.9 Examples of usage -3 Details - 3.1 Invalid/dangerous characters - 3.2 Character references/entities - 3.3 HTML elements - 3.3.1 HTML comments and 'CDATA' sections - 3.3.2 Tag-transformation for better XHTML-Strict - 3.3.3 Tag balancing and proper nesting - 3.3.4 Elements requiring child elements - 3.3.5 Beautify or compact HTML - 3.4 Attributes - 3.4.1 Auto-addition of XHTML-required attributes - 3.4.2 Duplicate/invalid 'id' values - 3.4.3 URL schemes (protocols) and scripts in attribute values - 3.4.4 Absolute & relative URLs - 3.4.5 Lower-cased, standard attribute values - 3.4.6 Transformation of deprecated attributes - 3.4.7 Anti-spam & 'href' - 3.4.8 Inline style properties - 3.4.9 Hook function for tag content - 3.5 Simple configuration directive for most valid XHTML - 3.6 Simple configuration directive for most `safe` HTML - 3.7 Using a hook function - 3.8 Obtaining `finalized` parameter values - 3.9 Retaining non-HTML tags in input with mixed markup -4 Other - 4.1 Support - 4.2 Known issues - 4.3 Change-log - 4.4 Testing - 4.5 Upgrade, & old versions - 4.6 Comparison with 'HTMLPurifier' - 4.7 Use through application plug-ins/modules - 4.8 Use in non-PHP applications - 4.9 Donate - 4.10 Acknowledgements -5 Appendices - 5.1 Characters discouraged in HTML - 5.2 Valid attribute-element combinations - 5.3 CSS 2.1 properties accepting URLs - 5.4 Microsoft Windows 1252 character replacements - 5.5 URL format - 5.6 Brief on htmLawed code - - -== 1 About htmLawed ================================================ - - - htmLawed is a highly customizable single-file PHP script to make text secure, and standard- and admin policy-compliant for use in the body of HTML 4, XHTML 1 or 1.1, or generic XML documents. It is thus a configurable input (X)HTML filter, processor, purifier, sanitizer, beautifier, etc., and an alternative to the HTMLTidy:- http://tidy.sourceforge.net application. - - The `lawing in` of input text is needed to ensure that HTML code in the text is standard-compliant, does not introduce security vulnerabilities, and does not break the aesthetics, design or layout of web-pages. htmLawed tries to do this by, for example, making HTML well-formed with balanced and properly nested tags, neutralizing code that may be used for cross-site scripting ('XSS') attacks, and allowing only specified HTML elements/tags and attributes. - - --- 1.1 Example uses ------------------------------------------------ - - - * Filtering of text submitted as comments on blogs to allow only certain HTML elements - - * Making RSS/Atom newsfeed item-content standard-compliant: often one uses an excerpt from an HTML document for the content, and with unbalanced tags, non-numerical entities, etc., such excerpts may not be XML-compliant - - * Text processing for stricter XML standard-compliance: e.g., to have lowercased 'x' in hexadecimal numeric entities becomes necessary if an XHTML document with MathML content needs to be served as 'application/xml' - - * Scraping text or data from web-pages - - * Pretty-printing HTML code - - --- 1.2 Features ---------------------------------------------------o - - - Key: '*' security feature, '^' standard compliance, '~' requires setting right options, '`' different from 'Kses' - - * make input more *secure* and *standard-compliant* - * use for HTML 4, XHTML 1.0 or 1.1, or even generic *XML* documents ^~` - - * *beautify* or *compact* HTML ^~` - - * *restrict elements* ^~` - * proper closure of empty elements like 'img' ^` - * *transform deprecated elements* like 'u' ^~` - * HTML *comments* and 'CDATA' sections can be permitted ^~` - * elements like 'script', 'object' and 'form' can be permitted ~ - - * *restrict attributes*, including *element-specifically* ^~` - * remove *invalid attributes* ^` - * element and attribute names are *lower-cased* ^ - * provide *required attributes*, like 'alt' for 'image' ^` - * *transform deprecated attributes* ^~` - * attributes *declared only once* ^` - - * *restrict attribute values*, including *element-specifically* ^~` - * a value is declared for `empty` (`minimized`) attributes like 'checked' ^ - * check for potentially dangerous attribute values *~ - * ensure *unique* 'id' attribute values ^~` - * *double-quote* attribute values ^ - * lower-case *standard attribute values* like 'password' ^` - - * *attribute-specific URL protocol/scheme restriction* *~` - * disable *dynamic expressions* in 'style' values *~` - - * neutralize invalid named character entities ^` - * *convert* hexadecimal numeric entities to decimal ones, or vice versa ^~` - * convert named entities to numeric ones for generic XML use ^~` - - * remove *null* characters * - * neutralize potentially dangerous proprietary Netscape *Javascript entities* * - * replace potentially dangerous *soft-hyphen* character in URL-accepting attribute values with spaces * - - * remove common *invalid characters* not allowed in HTML or XML ^` - * replace *characters from Microsoft applications* like 'Word' that are discouraged in HTML or XML ^~` - * neutralize entities for characters invalid or discouraged in HTML or XML ^` - * appropriately neutralize '<', '&', '"', and '>' characters ^*` - - * understands improperly spaced tag content (like, spread over more than a line) and properly spaces them ` - * attempts to *balance tags* for well-formedness ^~` - * understands when *omitable closing tags* like '

    ' (allowed in HTML 4, transitional, e.g.) are missing ^~` - * attempts to permit only *validly nested tags* ^~` - * option to *remove or neutralize bad content* ^~` - * attempts to *rectify common errors of plain-text misplacement* (e.g., directly inside 'blockquote') ^~` - - * fast, *non-OOP* code of ~45 kb incurring peak basal memory usage of ~0.5 MB - * *compatible* with pre-existing code using 'Kses' (the filter used by 'WordPress') - - * optional *anti-spam* measures such as addition of 'rel="nofollow"' and link-disabling ~` - * optionally makes *relative URLs absolute*, and vice versa ~` - - * optionally mark '&' to identify the entities for '&', '<' and '>' introduced by htmLawed ~` - - * allows deployment of powerful *hook functions* to *inject* HTML, *consolidate* 'style' attributes to 'class', finely check attribute values, etc. ~` - - * *independent of character encoding* of input and does not affect it - - * *tolerance for ill-written HTML* to a certain degree - - --- 1.3 History ----------------------------------------------------o - - - htmLawed was developed for use with 'LabWiki', a wiki software developed at PHP Labware, as a suitable software could not be found. Existing PHP software like 'Kses' and 'HTMLPurifier' were deemed inadequate, slow, resource-intensive, or dependent on external applications like 'HTML Tidy'. - - htmLawed started as a modification of Ulf Harnhammar's 'Kses' (version 0.2.2) software, and is compatible with code that uses 'Kses'; see section:- #2.6. - - --- 1.4 License & copyright ----------------------------------------o - - - htmLawed is free and open-source software dual licensed under LGPL license version 3:- http://www.gnu.org/licenses/lgpl-3.0.txt and GPL license version 2:- http://www.gnu.org/licenses/gpl-2.0.txt or later, and copyrighted by Santosh Patnaik, MD, PhD. - - --- 1.5 Terms used here --------------------------------------------o - - - * `administrator` - or admin; person setting up the code to pass input through htmLawed; also, `user` - * `attributes` - name-value pairs like 'href="http://x.com"' in opening tags - * `author` - `writer` - * `character` - atomic unit of text; internally represented by a numeric `code-point` as specified by the `encoding` or `charset` in use - * `entity` - markup like '>' and ' ' used to refer to a character - * `element` - HTML element like 'a' and 'img' - * `element content` - content between the opening and closing tags of an element, like 'click' of 'click' - * `HTML` - implies XHTML unless specified otherwise - * `input` - text string given to htmLawed to process - * `processing` - involves filtering, correction, etc., of input - * `safe` - absence or reduction of certain characters and HTML elements and attributes in the input that can otherwise potentially and circumstantially expose web-site users to security vulnerabilities like cross-site scripting attacks (XSS) - * `scheme` - URL protocol like 'http' and 'ftp' - * `specs` - standard specifications - * `style property` - terms like 'border' and 'height' for which declarations are made in values for the 'style' attribute of elements - * `tag` - markers like '' and '' delineating element content; the opening tag can contain attributes - * `tag content` - consists of tag markers '<' and '>', element names like 'div', and possibly attributes - * `user` - administrator - * `writer` - end-user like a blog commenter providing the input that is to be processed; also, `author` - - -== 2 Usage ========================================================oo - - - htmLawed should work with PHP 4.4 and higher. Either 'include()' the 'htmLawed.php' file or copy-paste the entire code. - - To easily *test* htmLawed using a form-based interface, use the provided demo:- htmLawedTest.php ('htmLawed.php' and 'htmLawedTest.php' should be in the same directory on the web-server). - - *Note*: For code for usage of the htmLawed class (for htmLawed in OOP), please refer to the htmLawed:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed website; the filtering itself can be configured, etc., as described here. - - --- 2.1 Simple ------------------------------------------------------ - - - The input text to be processed, '$text', is passed as an argument of type string; 'htmLawed()' returns the processed string: - - $processed = htmLawed($text); - - *Note*: If input is from a '$_GET' or '$_POST' value, and 'magic quotes' are enabled on the PHP setup, run 'stripslashes()' on the input before passing to htmLawed. - - By default, htmLawed will process the text allowing all valid HTML elements/tags, secure URL scheme/CSS style properties, etc. It will allow 'CDATA' sections and HTML comments, balance tags, and ensure proper nesting of elements. Such actions can be configured using two other optional arguments -- '$config' and '$spec': - - $processed = htmLawed($text, $config, $spec); - - These extra parameters are detailed below. Some examples are shown in section:- #2.9. - - *Note*: For maximum protection against 'XSS' and other scripting attacks (e.g., by disallowing Javascript code), consider using the 'safe' parameter; see section:- #3.6. - - --- 2.2 Configuring htmLawed using the '$config' parameter ---------o - - - '$config' instructs htmLawed on how to tackle certain tasks. When '$config' is not specified, or not set as an array (e.g., '$config = 1'), htmLawed will take default actions. One or many of the task-action or value-specification pairs can be specified in '$config' as array key-value pairs. If a parameter is not specified, htmLawed will use the default value/action indicated further below. - - $config = array('comment'=>0, 'cdata'=>1); - $processed = htmLawed($text, $config); - - Or, - - $processed = htmLawed($text, array('comment'=>0, 'cdata'=>1)); - - Below are the possible value-specification combinations. In PHP code, values that are integers should not be quoted and should be used as numeric types (unless meant as string/text). - - Key: '*' default, '^' different default when htmLawed is used in the Kses-compatible mode (see section:- #2.6), '~' different default when 'valid_xhtml' is set to '1' (see section:- #3.5), '"' different default when 'safe' is set to '1' (see section:- #3.6) - - *abs_url* - Make URLs absolute or relative; '$config["base_url"]' needs to be set; see section:- #3.4.4 - - '-1' - make relative - '0' - no action * - '1' - make absolute - - *and_mark* - Mark '&' characters in the original input; see section:- #3.2 - - *anti_link_spam* - Anti-link-spam measure; see section:- #3.4.7 - - '0' - no measure taken * - 'array("regex1", "regex2")' - will ensure a 'rel' attribute with 'nofollow' in its value in case the 'href' attribute value matches the regular expression pattern 'regex1', and/or will remove 'href' if its value matches the regular expression pattern 'regex2'. E.g., 'array("/./", "/://\W*(?!(abc\.com|xyz\.org))/")'; see section:- #3.4.7 for more. - - *anti_mail_spam* - Anti-mail-spam measure; see section:- #3.4.7 - - '0' - no measure taken * - 'word' - '@' in mail address in 'href' attribute value is replaced with specified 'word' - - *balance* - Balance tags for well-formedness and proper nesting; see section:- #3.3.3 - - '0' - no - '1' - yes * - - *base_url* - Base URL value that needs to be set if '$config["abs_url"]' is not '0'; see section:- #3.4.4 - - *cdata* - Handling of 'CDATA' sections; see section:- #3.3.1 - - '0' - don't consider 'CDATA' sections as markup and proceed as if plain text ^" - '1' - remove - '2' - allow, but neutralize any '<', '>', and '&' inside by converting them to named entities - '3' - allow * - - *clean_ms_char* - Replace discouraged characters introduced by Microsoft Word, etc.; see section:- #3.1 - - '0' - no * - '1' - yes - '2' - yes, but replace special single & double quotes with ordinary ones - - *comment* - Handling of HTML comments; see section:- #3.3.1 - - '0' - don't consider comments as markup and proceed as if plain text ^" - '1' - remove - '2' - allow, but neutralize any '<', '>', and '&' inside by converting to named entities - '3' - allow * - - *css_expression* - Allow dynamic CSS expression by not removing the expression from CSS property values in 'style' attributes; see section:- #3.4.8 - - '0' - remove * - '1' - allow - - *deny_attribute* - Denied HTML attributes; see section:- #3.4 - - '0' - none * - 'string' - dictated by values in 'string' - 'on*' (like 'onfocus') attributes not allowed - " - - *direct_nest_list* - Allow direct nesting of a list within another without requiring it to be a list item; see section:- #3.3.4 - - '0' - no * - '1' - yes - - *elements* - Allowed HTML elements; see section:- #3.3 - - '* -center -dir -font -isindex -menu -s -strike -u' - ~ - 'applet, embed, iframe, object, script' not allowed - " - - *hexdec_entity* - Allow hexadecimal numeric entities and do not convert to the more widely accepted decimal ones, or convert decimal to hexadecimal ones; see section:- #3.2 - - '0' - no - '1' - yes * - '2' - convert decimal to hexadecimal ones - - *hook* - Name of an optional hook function to alter the input string, '$config' or '$spec' before htmLawed starts its main work; see section:- #3.7 - - '0' - no hook function * - 'name' - 'name' is name of the hook function ('kses_hook' ^) - - *hook_tag* - Name of an optional hook function to alter tag content finalized by htmLawed; see section:- #3.4.9 - - '0' - no hook function * - 'name' - 'name' is name of the hook function - - *keep_bad* - Neutralize bad tags by converting '<' and '>' to entities, or remove them; see section:- #3.3.3 - - '0' - remove ^ - '1' - neutralize both tags and element content - '2' - remove tags but neutralize element content - '3' and '4' - like '1' and '2' but remove if text ('pcdata') is invalid in parent element - '5' and '6' * - like '3' and '4' but line-breaks, tabs and spaces are left - - *lc_std_val* - For XHTML compliance, predefined, standard attribute values, like 'get' for the 'method' attribute of 'form', must be lowercased; see section:- #3.4.5 - - '0' - no - '1' - yes * - - *make_tag_strict* - Transform/remove these non-strict XHTML elements, even if they are allowed by the admin: 'applet' 'center' 'dir' 'embed' 'font' 'isindex' 'menu' 's' 'strike' 'u'; see section:- #3.3.2 - - '0' - no ^ - '1' - yes, but leave 'applet', 'embed' and 'isindex' elements that currently can't be transformed * - '2' - yes, removing 'applet', 'embed' and 'isindex' elements and their contents (nested elements remain) ~ - - *named_entity* - Allow non-universal named HTML entities, or convert to numeric ones; see section:- #3.2 - - '0' - convert - '1' - allow * - - *no_deprecated_attr* - Allow deprecated attributes or transform them; see section:- #3.4.6 - - '0' - allow ^ - '1' - transform, but 'name' attributes for 'a' and 'map' are retained * - '2' - transform - - *parent* - Name of the parent element, possibly imagined, that will hold the input; see section:- #3.3 - - *safe* - Magic parameter to make input the most secure against XSS without needing to specify other relevant '$config' parameters; see section:- #3.6 - - '0' - no * - '1' - will auto-adjust other relevant '$config' parameters (indicated by '"' in this list) - - *schemes* - Array of attribute-specific, comma-separated, lower-cased list of schemes (protocols) allowed in attributes accepting URLs (or '!' to `deny` any URL); '*' covers all unspecified attributes; see section:- #3.4.3 - - 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https' * - '*: ftp, gopher, http, https, mailto, news, nntp, telnet' ^ - 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; style: !; *:file, http, https' " - - *show_setting* - Name of a PHP variable to assign the `finalized` '$config' and '$spec' values; see section:- #3.8 - - *style_pass* - Do not look at 'style' attribute values, letting them through without any alteration - - '0' - no * - '1' - htmLawed will let through any 'style' value; see section:- #3.4.8 - - *tidy* - Beautify or compact HTML code; see section:- #3.3.5 - - '-1' - compact - '0' - no * - '1' or 'string' - beautify (custom format specified by 'string') - - *unique_ids* - 'id' attribute value checks; see section:- #3.4.2 - - '0' - no ^ - '1' - remove duplicate and/or invalid ones * - 'word' - remove invalid ones and replace duplicate ones with new and unique ones based on the 'word'; the admin-specified 'word', like 'my_', should begin with a letter (a-z) and can contain letters, digits, '.', '_', '-', and ':'. - - *valid_xhtml* - Magic parameter to make input the most valid XHTML without needing to specify other relevant '$config' parameters; see section:- #3.5 - - '0' - no * - '1' - will auto-adjust other relevant '$config' parameters (indicated by '~' in this list) - - *xml:lang* - Auto-adding 'xml:lang' attribute; see section:- #3.4.1 - - '0' - no * - '1' - add if 'lang' attribute is present - '2' - add if 'lang' attribute is present, and remove 'lang' ~ - - --- 2.3 Extra HTML specifications using the $spec parameter --------o - - - The '$spec' argument can be used to disallow an otherwise legal attribute for an element, or to restrict the attribute's values. This can also be helpful as a security measure (e.g., in certain versions of browsers, certain values can cause buffer overflows and denial of service attacks), or in enforcing admin policy compliance. '$spec' is specified as a string of text containing one or more `rules`, with multiple rules separated from each other by a semi-colon (';'). E.g., - - $spec = 'i=-*; td, tr=style, id, -*; a=id(match="/[a-z][a-z\d.:\-`"]*/i"/minval=2), href(maxlen=100/minlen=34); img=-width,-alt'; - $processed = htmLawed($text, $config, $spec); - - Or, - - $processed = htmLawed($text, $config, 'i=-*; td, tr=style, id, -*; a=id(match="/[a-z][a-z\d.:\-`"]*/i"/minval=2), href(maxlen=100/minlen=34); img=-width,-alt'); - - A rule begins with an HTML *element* name(s) (`rule-element`), for which the rule applies, followed by an equal ('=') sign. A rule-element may represent multiple elements if comma (,)-separated element names are used. E.g., 'th,td,tr='. - - Rest of the rule consists of comma-separated HTML *attribute names*. A minus ('-') character before an attribute means that the attribute is not permitted inside the rule-element. E.g., '-width'. To deny all attributes, '-*' can be used. - - Following shows examples of rule excerpts with rule-element 'a' and the attributes that are being permitted: - - * 'a=' - all - * 'a=id' - all - * 'a=href, title, -id, -onclick' - all except 'id' and 'onclick' - * 'a=*, id, -id' - all except 'id' - * 'a=-*' - none - * 'a=-*, href, title' - none except 'href' and 'title' - * 'a=-*, -id, href, title' - none except 'href' and 'title' - - Rules regarding *attribute values* are optionally specified inside round brackets after attribute names in slash ('/')-separated `parameter = value` pairs. E.g., 'title(maxlen=30/minlen=5)'. None, or one or more of the following parameters may be specified: - - * 'oneof' - one or more choices separated by '|' that the value should match; if only one choice is provided, then the value must match that choice - - * 'noneof' - one or more choices separated by '|' that the value should not match - - * 'maxlen' and 'minlen' - upper and lower limits for the number of characters in the attribute value; specified in numbers - - * 'maxval' and 'minval' - upper and lower limits for the numerical value specified in the attribute value; specified in numbers - - * 'match' and 'nomatch' - pattern that the attribute value should or should not match; specified as PHP/PCRE-compatible regular expressions with delimiters and possibly modifiers - - * 'default' - a value to force on the attribute if the value provided by the writer does not fit any of the specified parameters - - If 'default' is not set and the attribute value does not satisfy any of the specified parameters, then the attribute is removed. The 'default' value can also be used to force all attribute declarations to take the same value (by getting the values declared illegal by setting, e.g., 'maxlen' to '-1'). - - Examples with `input` '' are shown below. - - `Rule`: 'input=title(maxlen=60/minlen=6), value' - `Output`: '' - - `Rule`: 'input=title(), value(maxval=8/default=6)' - `Output`: '' - - `Rule`: 'input=title(nomatch=%w.d%i), value(match=%em%/default=6em)' - `Output`: '' - - `Rule`: 'input=title(oneof=height|depth/default=depth), value(noneof=5|6)' - `Output`: '' - - *Special characters*: The characters ';', ',', '/', '(', ')', '|', '~' and space have special meanings in the rules. Words in the rules that use such characters, or the characters themselves, should be `escaped` by enclosing in pairs of double-quotes ('"'). A back-tick ('`') can be used to escape a literal '"'. An example rule illustrating this is 'input=value(maxlen=30/match="/^\w/"/default="your `"ID`"")'. - - *Note*: To deny an attribute for all elements for which it is legal, '$config["deny_attribute"]' (see section:- #3.4) can be used instead of '$spec'. Also, attributes can be allowed element-specifically through '$spec' while being denied globally through '$config["deny_attribute"]'. The 'hook_tag' parameter (section:- #3.4.9) can also be used to implement the '$spec' functionality. - - --- 2.4 Performance time & memory usage ----------------------------o - - - The time and memory used by htmLawed depends on its configuration and the size of the input, and the amount, nestedness and well-formedness of the HTML markup within it. In particular, tag balancing and beautification each can increase the processing time by about a quarter. - - The htmLawed demo:- htmLawedTest.php can be used to evaluate the performance and effects of different types of input and '$config'. - - --- 2.5 Some security risks to keep in mind ------------------------o - - - When setting the parameters/arguments (like those to allow certain HTML elements) for use with htmLawed, one should bear in mind that the setting may let through potentially `dangerous` HTML code which is meant to steal user-data, deface a website, render a page non-functional, etc. - - Unless end-users, either people or software, supplying the content are completely trusted, security issues arising from the degree of HTML usage permission has to be kept in mind. For example, following increase security risks: - - * Allowing 'script', 'applet', 'embed', 'iframe' or 'object' elements, or certain of their attributes like 'allowscriptaccess' - - * Allowing HTML comments (some Internet Explorer versions are vulnerable with, e.g., '' - - * Allowing dynamic CSS expressions (a feature of the IE browser) - - * Allowing the 'style' attribute - - To remove `unsecure` HTML, code-developers using htmLawed must set '$config' appropriately. E.g., '$config["elements"] = "* -script"' to deny the 'script' element (section:- #3.3), '$config["safe"] = 1' to auto-configure ceratin htmLawed parameters for maximizing security (section:- #3.6), etc. - - Permitting the '*style*' attribute brings in risks of `click-jacking`, `phishing`, web-page overlays, etc., `even` when the 'safe' parameter is enabled (see section:- #3.6). Except for URLs and a few other things like CSS dynamic expressions, htmLawed currently does not check every CSS style property. It does provide ways for the code-developer implementing htmLawed to do such checks through htmLawed's '$spec' argument, and through the 'hook_tag' parameter (see section:- #3.4.8 for more). Disallowing 'style' completely and relying on CSS classes and stylesheet files is recommended. - - htmLawed does not check or correct the character *encoding* of the input it receives. In conjunction with permitting circumstances such as when the character encoding is left undefined through HTTP headers or HTML 'meta' tags, this can permit an exploit (like Google's UTF-7/XSS vulnerability of the past). - - --- 2.6 Use without modifying old 'kses()' code --------------------o - - - The 'Kses' PHP script is used by many applications (like 'WordPress'). It is possible to have such applications use htmLawed instead, since it is compatible with code that calls the 'kses()' function declared in the 'Kses' file (usually named 'kses.php'). E.g., application code like this will continue to work after replacing 'Kses' with htmLawed: - - $comment_filtered = kses($comment_input, array('a'=>array(), 'b'=>array(), 'i'=>array())); - - For some of the '$config' parameters, htmLawed will use values other than the default ones. These are indicated by '^' in section:- #2.2. To force htmLawed to use other values, function 'kses()' in the htmLawed code should be edited -- a few configurable parameters/variables need to be changed. - - If the application uses a 'Kses' file that has the 'kses()' function declared, then, to have the application use htmLawed instead of 'Kses', simply rename 'htmLawed.php' (to 'kses.php', e.g.) and replace the 'Kses' file (or just replace the code in the 'Kses' file with the htmLawed code). If the 'kses()' function in the 'Kses' file had been renamed by the application developer (e.g., in 'WordPress', it is named 'wp_kses()'), then appropriately rename the 'kses()' function in the htmLawed code. - - If the 'Kses' file used by the application has been highly altered by the application developers, then one may need a different approach. E.g., with 'WordPress', it is best to copy the htmLawed code to 'wp_includes/kses.php', rename the newly added function 'kses()' to 'wp_kses()', and delete the code for the original 'wp_kses()' function. - - If the 'Kses' code has a non-empty hook function (e.g., 'wp_kses_hook()' in case of 'WordPress'), then the code for htmLawed's 'kses_hook()' function should be appropriately edited. However, the requirement of the hook function should be re-evaluated considering that htmLawed has extra capabilities. With 'WordPress', the hook function is an essential one. The following code is suggested for the htmLawed 'kses_hook()' in case of 'WordPress': - - function kses_hook($string, &$cf, &$spec){ - // kses compatibility - $allowed_html = $spec; - $allowed_protocols = array(); - foreach($cf['schemes'] as $v){ - foreach($v as $k2=>$v2){ - if(!in_array($k2, $allowed_protocols)){ - $allowed_protocols[] = $k2; - } - } - } - return wp_kses_hook($string, $allowed_html, $allowed_protocols); - // eof - } - - --- 2.7 Tolerance for ill-written HTML -----------------------------o - - - htmLawed can work with ill-written HTML code in the input. However, HTML that is too ill-written may not be `read` as HTML, and be considered mere plain text instead. Following statements indicate the degree of `looseness` that htmLawed can work with, and can be provided in instructions to writers: - - * Tags must be flanked by '<' and '>' with no '>' inside -- any needed '>' should be put in as '>'. It is possible for tag content (element name and attributes) to be spread over many lines instead of being on one. A space may be present between the tag content and '>', like '
    ' and '', but not after the '<'. - - * Element and attribute names need not be lower-cased. - - * Attribute string of elements may be liberally spaced with tabs, line-breaks, etc. - - * Attribute values may not be double-quoted, or may be single-quoted. - - * Left-padding of numeric entities (like, ' ', '&x07ff;') with '0' is okay as long as the number of characters between between the '&' and the ';' does not exceed 8. All entities must end with ';' though. - - * Named character entities must be properly cased. E.g., '≪' or '&TILDE;' will not be let through without modification. - - * HTML comments should not be inside element tags (okay between tags), and should begin with ''. Characters like '<', '>', and '&' may be allowed inside depending on '$config', but any '-->' inside should be put in as '-->'. Any '--' inside will be automatically converted to '-', and a space will be added before the comment delimiter '-->'. - - * 'CDATA' sections should not be inside element tags, and can be in element content only if plain text is allowed for that element. They should begin with '<[CDATA[' and end with ']]>'. Characters like '<', '>', and '&' may be allowed inside depending on '$config', but any ']]>' inside should be put in as ']]>'. - - * For attribute values, character entities '<', '>' and '&' should be used instead of characters '<' and '>', and '&' (when '&' is not part of a character entity). This applies even for Javascript code in values of attributes like 'onclick'. - - * Characters '<', '>', '&' and '"' that are part of actual Javascript, etc., code in 'script' elements should be used as such and not be put in as entities like '>'. Otherwise, though the HTML will be valid, the code may fail to work. Further, if such characters have to be used, then they should be put inside 'CDATA' sections. - - * Simple instructions like "an opening tag cannot be present between two closing tags" and "nested elements should be closed in the reverse order of how they were opened" can help authors write balanced HTML. If tags are imbalanced, htmLawed will try to balance them, but in the process, depending on '$config["keep_bad"]', some code/text may be lost. - - * Input authors should be notified of admin-specified allowed elements, attributes, configuration values (like conversion of named entities to numeric ones), etc. - - * With '$config["unique_ids"]' not '0' and the 'id' attribute being permitted, writers should carefully avoid using duplicate or invalid 'id' values as even though htmLawed will correct/remove the values, the final output may not be the one desired. E.g., when '' is processed into -''. - - * Note that even if intended HTML is lost in a highly ill-written input, the processed output will be more secure and standard-compliant. - - * For URLs, unless '$config["scheme"]' is appropriately set, writers should avoid using escape characters or entities in schemes. E.g., 'http' (which many browsers will read as the harmless 'http') may be considered bad by htmLawed. - - * htmLawed will attempt to put plain text present directly inside 'blockquote', 'form', 'map' and 'noscript' elements (illegal as per the specs) inside auto-generated 'div' elements. - - --- 2.8 Limitations & work-arounds ---------------------------------o - - - htmLawed's main objective is to make the input text `more` standard-compliant, secure for web-page readers, and free of HTML elements and attributes considered undesirable by the administrator. Some of its current limitations, regardless of this objective, are noted below along with work-arounds. - - It should be borne in mind that no browser application is 100% standard-compliant, and that some of the standard specs (like asking for normalization of white-spacing within 'textarea' elements) are clearly wrong. Regarding security, note that `unsafe` HTML code is not necessarily legally invalid. - - * htmLawed is meant for input that goes into the 'body' of HTML documents. HTML's head-level elements are not supported, nor are the frameset elements 'frameset', 'frame' and 'noframes'. - - * It cannot transform the non-standard 'embed' elements to the standard-compliant 'object' elements. Yet, it can allow 'embed' elements if permitted ('embed' is widely used and supported). Admins can certainly use the 'hook_tag' parameter (section:- #3.4.9) to deploy a custom embed-to-object converter function. - - * The only non-standard element that may be permitted is 'embed'; others like 'noembed' and 'nobr' cannot be permitted without modifying the htmLawed code. - - * It cannot handle input that has non-HTML code like 'SVG' and 'MathML'. One way around is to break the input into pieces and passing only those without non-HTML code to htmLawed. Another is described in section:- #3.9. A third way may be to some how take advantage of the '$config["and_mark"]' parameter (see section:- #3.2). - - * By default, htmLawed won't check many attribute values for standard compliance. E.g., 'width="20m"' with the dimension in non-standard 'm' is let through. Implementing universal and strict attribute value checks can make htmLawed slow and resource-intensive. Admins should look at the 'hook_tag' parameter (section:- #3.4.9) or '$spec' to enforce finer checks. - - * The attributes, deprecated (which can be transformed too) or not, that it supports are largely those that are in the specs. Only a few of the proprietary attributes are supported. - - * Except for contained URLs and dynamic expressions (also optional), htmLawed does not check CSS style property values. Admins should look at using the 'hook_tag' parameter (section:- #3.4.9) or '$spec' for finer checks. Perhaps the best option is to disallow 'style' but allow 'class' attributes with the right 'oneof' or 'match' values for 'class', and have the various class style properties in '.css' CSS stylesheet files. - - * htmLawed does not parse emoticons, decode `BBcode`, or `wikify`, auto-converting text to proper HTML. Similarly, it won't convert line-breaks to 'br' elements. Such functions are beyond its purview. Admins should use other code to pre- or post-process the input for such purposes. - - * htmLawed cannot be used to have links force-opened in new windows (by auto-adding appropriate 'target' and 'onclick' attributes to 'a'). Admins should look at Javascript-based DOM-modifying solutions for this. Admins may also be able to use a custom hook function to enforce such checks ('hook_tag' parameter; see section:- #3.4.9). - - * Nesting-based checks are not possible. E.g., one cannot disallow 'p' elements specifically inside 'td' while permitting it elsewhere. Admins may be able to use a custom hook function to enforce such checks ('hook_tag' parameter; see section:- #3.4.9). - - * Except for optionally converting absolute or relative URLs to the other type, htmLawed will not alter URLs (e.g., to change the value of query strings or to convert 'http' to 'https'. Having absolute URLs may be a standard-requirement, e.g., when HTML is embedded in email messages, whereas altering URLs for other purposes is beyond htmLawed's goals. Admins may be able to use a custom hook function to enforce such checks ('hook_tag' parameter; see section:- #3.4.9). - - * Pairs of opening and closing tags that do not enclose any content (like '') are not removed. This may be against the standard specs for certain elements (e.g., 'table'). However, presence of such standard-incompliant code will not break the display or layout of content. Admins can also use simple regex-based code to filter out such code. - - * htmLawed does not check for certain element orderings described in the standard specs (e.g., in a 'table', 'tbody' is allowed before 'tfoot'). Admins may be able to use a custom hook function to enforce such checks ('hook_tag' parameter; see section:- #3.4.9). - - * htmLawed does not check the number of nested elements. E.g., it will allow two 'caption' elements in a 'table' element, illegal as per the specs. Admins may be able to use a custom hook function to enforce such checks ('hook_tag' parameter; see section:- #3.4.9). - - * htmLawed might convert certain entities to actual characters and remove backslashes and CSS comment-markers ('/*') in 'style' attribute values in order to detect malicious HTML like crafted IE-specific dynamic expressions like 'expression...'. If this is too harsh, admins can allow CSS expressions through htmLawed core but then use a custom function through the 'hook_tag' parameter (section:- #3.4.9) to more specifically identify CSS expressions in the 'style' attribute values. Also, using '$config["style_pass"]', it is possible to have htmLawed pass 'style' attribute values without even looking at them (section:- #3.4.8). - - * htmLawed does not correct certain possible attribute-based security vulnerabilities (e.g., 'x'). These arise when browsers mis-identify markup in `escaped` text, defeating the very purpose of escaping text (a bad browser will read the given example as 'x'). - - * Because of poor Unicode support in PHP, htmLawed does not remove the `high value` HTML-invalid characters with multi-byte code-points. Such characters however are extremely unlikely to be in the input. (see section:- #3.1). - - * htmLawed does not check or correct the character encoding of the input it receives. In conjunction with permitting circumstances such as when the character encoding is left undefined through HTTP headers or HTML 'meta' tags, this can permit an exploit (like Google's UTF-7/XSS vulnerability of the past). - - * Like any script using PHP's PCRE regex functions, PHP setup-specific low PCRE limit values can cause htmLawed to at least partially fail with very long input texts. - - --- 2.9 Examples of usage -------------------------------------------o - - - Safest, allowing only `safe` HTML markup -- - - $config = array('safe'=>1); - $out = htmLawed($in); - - Simplest, allowing all valid HTML markup except 'javascript:' -- - - $out = htmLawed($in); - - Allowing all valid HTML markup including 'javascript:' -- - - $config = array('schemes'=>'*:*'); - $out = htmLawed($in, $config); - - Allowing only 'safe' HTML and the elements 'a', 'em', and 'strong' -- - - $config = array('safe'=>1, 'elements'=>'a, em, strong'); - $out = htmLawed($in, $config); - - Not allowing elements 'script' and 'object' -- - - $config = array('elements'=>'* -script -object'); - $out = htmLawed($in, $config); - - Not allowing attributes 'id' and 'style' -- - - $config = array('deny_attribute'=>'id, style'); - $out = htmLawed($in, $config); - - Permitting only attributes 'title' and 'href' -- - - $config = array('deny_attribute'=>'* -title -href'); - $out = htmLawed($in, $config); - - Remove bad/disallowed tags altogether instead of converting them to entities -- - - $config = array('keep_bad'=>0); - $out = htmLawed($in, $config); - - Allowing attribute 'title' only in 'a' and not allowing attributes 'id', 'style', or scriptable `on*` attributes like 'onclick' -- - - $config = array('deny_attribute'=>'title, id, style, on*'); - $spec = 'a=title'; - $out = htmLawed($in, $config, $spec); - - Some case-studies are presented below. - - *1.* A blog administrator wants to allow only 'a', 'em', 'strike', 'strong' and 'u' in comments, but needs 'strike' and 'u' transformed to 'span' for better XHTML 1-strict compliance, and, he wants the 'a' links to be to 'http' or 'https' resources: - - $processed = htmLawed($in, array('elements'=>'a, em, strike, strong, u', 'make_tag_strict'=>1, 'safe'=>1, 'schemes'=>'*:http, https'), 'a=href'); - - *2.* An author uses a custom-made web application to load content on his web-site. He is the only one using that application and the content he generates has all types of HTML, including scripts. The web application uses htmLawed primarily as a tool to correct errors that creep in while writing HTML and to take care of the occasional `bad` characters in copy-paste text introduced by Microsoft Office. The web application provides a preview before submitted input is added to the content. For the previewing process, htmLawed is set up as follows: - - $processed = htmLawed($in, array('css_expression'=>1, 'keep_bad'=>1, 'make_tag_strict'=>1, 'schemes'=>'*:*', 'valid_xhtml'=>1)); - - For the final submission process, 'keep_bad' is set to '6'. A value of '1' for the preview process allows the author to note and correct any HTML mistake without losing any of the typed text. - - *3.* A data-miner is scraping information in a specific table of similar web-pages and is collating the data rows, and uses htmLawed to reduce unnecessary markup and white-spaces: - - $processed = htmLawed($in, array('elements'=>'tr, td', 'tidy'=>-1), 'tr, td ='); - - -== 3 Details =====================================================oo - - --- 3.1 Invalid/dangerous characters -------------------------------- - - - Valid characters (more correctly, their code-points) in HTML or XML are, hexadecimally, '9', 'a', 'd', '20' to 'd7ff', and 'e000' to '10ffff', except 'fffe' and 'ffff' (decimally, '9', '10', '13', '32' to '55295', and '57344' to '1114111', except '65534' and '65535'). htmLawed removes the invalid characters '0' to '8', 'b', 'c', and 'e' to '1f'. - - Because of PHP's poor native support for multi-byte characters, htmLawed cannot check for the remaining invalid code-points. However, for various reasons, it is very unlikely for any of those characters to be in the input. - - Characters that are discouraged (see section:- #5.1) but not invalid are not removed by htmLawed. - - It (function 'hl_tag()') also replaces the potentially dangerous (in some Mozilla [Firefox] and Opera browsers) soft-hyphen character (code-point, hexadecimally, 'ad', or decimally, '173') in attribute values with spaces. Where required, the characters '<', '>', '&', and '"' are converted to entities. - - With '$config["clean_ms_char"]' set as '1' or '2', many of the discouraged characters (decimal code-points '127' to '159' except '133') that many Microsoft applications incorrectly use (as per the 'Windows 1252' ['Cp-1252'] or a similar encoding system), and the character for decimal code-point '133', are converted to appropriate decimal numerical entities (or removed for a few cases)-- see appendix in section:- #5.4. This can help avoid some display issues arising from copying-pasting of content. - - With '$config["clean_ms_char"]' set as '2', characters for the hexadecimal code-points '82', '91', and '92' (for special single-quotes), and '84', '93', and '94' (for special double-quotes) are converted to ordinary single and double quotes respectively and not to entities. - - The character values are replaced with entities/characters and not character values referred to by the entities/characters to keep this task independent of the character-encoding of input text. - - The '$config["clean_ms_char"]' parameter should not be used if authors do not copy-paste Microsoft-created text, or if the input text is not believed to use the 'Windows 1252' ('Cp-1252') or a similar encoding like 'Cp-1251'. Further, the input form and the web-pages displaying it or its content should have the character encoding appropriately marked-up. - - --- 3.2 Character references/entities ------------------------------o - - - Valid character entities take the form '&*;' where '*' is '#x' followed by a hexadecimal number (hexadecimal numeric entity; like ' ' for non-breaking space), or alphanumeric like 'gt' (external or named entity; like ' ' for non-breaking space), or '#' followed by a number (decimal numeric entity; like ' ' for non-breaking space). Character entities referring to the soft-hyphen character (the '­' or '\xad' character; hexadecimal code-point 'ad' [decimal '173']) in URL-accepting attribute values are always replaced with spaces; soft-hyphens in attribute values introduce vulnerabilities in some older versions of the Opera and Mozilla [Firefox] browsers. - - htmLawed (function 'hl_ent()'): - - * Neutralizes entities with multiple leading zeroes or missing semi-colons (potentially dangerous) - - * Lowercases the 'X' (for XML-compliance) and 'A-F' of hexadecimal numeric entities - - * Neutralizes entities referring to characters that are HTML-invalid (see section:- #3.1) - - * Neutralizes entities referring to characters that are HTML-discouraged (code-points, hexadecimally, '7f' to '84', '86' to '9f', and 'fdd0' to 'fddf', or decimally, '127' to '132', '134' to '159', and '64991' to '64976'). Entities referring to the remaining discouraged characters (see section:- #5.1 for a full list) are let through. - - * Neutralizes named entities that are not in the specs. - - * Optionally converts valid HTML-specific named entities except '>', '<', '"', and '&' to decimal numeric ones (hexadecimal if $config["hexdec_entity"] is '2') for generic XML-compliance. For this, '$config["named_entity"]' should be '1'. - - * Optionally converts hexadecimal numeric entities to the more widely supported decimal ones. For this, '$config["hexdec_entity"]' should be '0'. - - * Optionally converts decimal numeric entities to the hexadecimal ones. For this, '$config["hexdec_entity"]' should be '2'. - - `Neutralization` refers to the `entitification` of '&' to '&'. - - *Note*: htmLawed does not convert entities to the actual characters represented by them; one can pass the htmLawed output through PHP's 'html_entity_decode' function:- http://www.php.net/html_entity_decode for that. - - *Note*: If '$config["and_mark"]' is set, and set to a value other than '0', then the '&' characters in the original input are replaced with the control character for the hexadecimal code-point '6' ('\x06'; '&' characters introduced by htmLawed, e.g., after converting '<' to '<', are not affected). This allows one to distinguish, say, an '>' introduced by htmLawed and an '>' put in by the input writer, and can be helpful in further processing of the htmLawed-processed text (e.g., to identify the character sequence 'o(><)o' to generate an emoticon image). When this feature is active, admins should ensure that the htmLawed output is not directly used in web pages or XML documents as the presence of the '\x06' can break documents. Before use in such documents, and preferably before any storage, any remaining '\x06' should be changed back to '&', e.g., with: - - $final = str_replace("\x06", '&', $prelim); - - Also, see section:- #3.9. - - --- 3.3 HTML elements ----------------------------------------------o - - - htmLawed can be configured to allow only certain HTML elements (tags) in the input. Disallowed elements (just tag-content, and not element-content), based on '$config["keep_bad"]', are either `neutralized` (converted to plain text by entitification of '<' and '>') or removed. - - E.g., with only 'em' permitted: - - Input: - - My website is My website is a.com. - - Output, with '$config["keep_bad"]' not '0': - - My website is <a href="">a.com</a>. - - See section:- #3.3.3 for differences between the various non-zero '$config["keep_bad"]' values. - - htmLawed by default permits these 86 elements: - - a, abbr, acronym, address, applet, area, b, bdo, big, blockquote, br, button, caption, center, cite, code, col, colgroup, dd, del, dfn, dir, div, dl, dt, em, embed, fieldset, font, form, h1, h2, h3, h4, h5, h6, hr, i, iframe, img, input, ins, isindex, kbd, label, legend, li, map, menu, noscript, object, ol, optgroup, option, p, param, pre, q, rb, rbc, rp, rt, rtc, ruby, s, samp, script, select, small, span, strike, strong, sub, sup, table, tbody, td, textarea, tfoot, th, thead, tr, tt, u, ul, var - - Except for 'embed' (included because of its wide-spread use) and the Ruby elements ('rb', 'rbc', 'rp', 'rt', 'rtc', 'ruby'; part of XHTML 1.1), these are all the elements in the HTML 4/XHTML 1 specs. Strict-specific specs. exclude 'center', 'dir', 'font', 'isindex', 'menu', 's', 'strike', and 'u'. - - With '$config["safe"] = 1', the default set will exclude 'applet', 'embed', 'iframe', 'object' and 'script'; see section:- #3.6. - - When '$config["elements"]', which specifies allowed elements, is `properly` defined, and neither empty nor set to '0' or '*', the default set is not used. To have elements added to or removed from the default set, a '+/-' notation is used. E.g., '*-script-object' implies that only 'script' and 'object' are disallowed, whereas '*+embed' means that 'noembed' is also allowed. Elements can also be specified as comma separated names. E.g., 'a, b, i' means only 'a', 'b' and 'i' are permitted. In this notation, '*', '+' and '-' have no significance and can actually cause a mis-reading. - - Some more examples of '$config["elements"]' values indicating permitted elements (note that empty spaces are liberally allowed for clarity): - - * 'a, blockquote, code, em, strong' -- only 'a', 'blockquote', 'code', 'em', and 'strong' - * '*-script' -- all excluding 'script' - * '* -center -dir -font -isindex -menu -s -strike -u' -- only XHTML-Strict elements - * '*+noembed-script' -- all including 'noembed' excluding 'script' - - Some mis-usages (and the resulting permitted elements) that can be avoided: - - * '-*' -- none; instead of htmLawed, one might just use, e.g., the 'htmlspecialchars()' PHP function - * '*, -script' -- all except 'script'; admin probably meant '*-script' - * '-*, a, em, strong' -- all; admin probably meant 'a, em, strong' - * '*' -- all; admin need not have set 'elements' - * '*-form+form' -- all; a '+' will always over-ride any '-' - * '*, noembed' -- only 'noembed'; admin probably meant '*+noembed' - * 'a, +b, i' -- only 'a' and 'i'; admin probably meant 'a, b, i' - - Basically, when using the '+/-' notation, commas (',') should not be used, and vice versa, and '*' should be used with the former but not the latter. - - *Note*: Even if an element that is not in the default set is allowed through '$config["elements"]', like 'noembed' in the last example, it will eventually be removed during tag balancing unless such balancing is turned off ('$config["balance"]' set to '0'). Currently, the only way around this, which actually is simple, is to edit the various arrays in the function 'hl_bal()' to accommodate the element and its nesting properties. - - *A possibly second way to specify allowed elements* is to set '$config["parent"]' to an element name that supposedly will hold the input, and to set '$config["balance"]' to '1'. During tag balancing (see section:- #3.3.3), all elements that cannot legally nest inside the parent element will be removed. The parent element is auto-reset to 'div' if '$config["parent"]' is empty, 'body', or an element not in htmLawed's default set of 86 elements. - - `Tag transformation` is possible for improving XHTML-Strict compliance -- most of the deprecated elements are removed or converted to valid XHTML-Strict ones; see section:- #3.3.2. - - -.. 3.3.1 Handling of comments and CDATA sections ................... - - - 'CDATA' sections have the format '"...]]>', and HTML comments, '"... -->'. Neither HTML comments nor 'CDATA' sections can reside inside tags. HTML comments can exist anywhere else, but 'CDATA' sections can exist only where plain text is allowed (e.g., immediately inside 'td' element content but not immediately inside 'tr' element content). - - htmLawed (function 'hl_cmtcd()') handles HTML comments or 'CDATA' sections depending on the values of '$config["comment"]' or '$config["cdata"]'. If '0', such markup is not looked for and the text is processed like plain text. If '1', it is removed completely. If '2', it is preserved but any '<', '>' and '&' inside are changed to entities. If '3', they are left as such. - - Note that for the last two cases, HTML comments and 'CDATA' sections will always be removed from tag content (function 'hl_tag()'). - - Examples: - - Input: - Home - Output ('$config["comment"] = 0, $config["cdata"] = 2'): - <-- home link -->Home - Output ('$config["comment"] = 1, $config["cdata"] = 2'): - Home - Output ('$config["comment"] = 2, $config["cdata"] = 2'): - Home - Output ('$config["comment"] = 2, $config["cdata"] = 1'): - Home - Output ('$config["comment"] = 3, $config["cdata"] = 3'): - Home - - For standard-compliance, comments are given the form '', and any '--' in the content is made '-'. - - When '$config["safe"] = 1', CDATA sections and comments are considered plain text unless '$config["comment"]' or '$config["cdata"]' is explicitly specified; see section:- #3.6. - - -.. 3.3.2 Tag-transformation for better XHTML-Strict ................o - - - If '$config["make_tag_strict"]' is set and not '0', following non-XHTML-Strict elements (and attributes), even if admin-permitted, are mutated as indicated (element content remains intact; function 'hl_tag2()'): - - * applet - (based on '$config["make_tag_strict"]', unchanged ('1') or removed ('2')) - * center - 'div style="text-align: center;"' - * dir - 'ul' - * embed - (based on '$config["make_tag_strict"]', unchanged ('1') or removed ('2')) - * font (face, size, color) - 'span style="font-family: ; font-size: ; color: ;"' (size transformation reference:- http://style.cleverchimp.com/font_size_intervals/altintervals.html) - * isindex - (based on '$config["make_tag_strict"]', unchanged ('1') or removed ('2')) - * menu - 'ul' - * s - 'span style="text-decoration: line-through;"' - * strike - 'span style="text-decoration: line-through;"' - * u - 'span style="text-decoration: underline;"' - - For an element with a pre-existing 'style' attribute value, the extra style properties are appended. - - Example input: - -
    - The PHP software script used for this web-page web-page is htmLawedTest.php, from PHP Labware. -
    - - The output: - -
    - The PHP software script used for this web-page web-page is htmLawedTest.php, from PHP Labware. -
    - - --- 3.3.3 Tag balancing and proper nesting -------------------------o - - - If '$config["balance"]' is set to '1', htmLawed (function 'hl_bal()') checks and corrects the input to have properly balanced tags and legal element content (i.e., any element nesting should be valid, and plain text may be present only in the content of elements that allow them). - - Depending on the value of '$config["keep_bad"]' (see section:- #2.2 and section:- #3.3), illegal content may be removed or neutralized to plain text by converting < and > to entities: - - '0' - remove; this option is available only to maintain Kses-compatibility and should not be used otherwise (see section:- #2.6) - '1' - neutralize tags and keep element content - '2' - remove tags but keep element content - '3' and '4' - like '1' and '2', but keep element content only if text ('pcdata') is valid in parent element as per specs - '5' and '6' - like '3' and '4', but line-breaks, tabs and spaces are left - - Example input (disallowing the 'p' element): - - <*> Pseudo-tags <*> - Non-HTML tag xml -

    - Disallowed tag p -

    -
      Bad
    • OK
    - - The output with '$config["keep_bad"] = 1': - - <*> Pseudo-tags <*> - <xml>Non-HTML tag xml</xml> - <p> - Disallowed tag p - </p> -
      Bad
    • OK
    - - The output with '$config["keep_bad"] = 3': - - <*> Pseudo-tags <*> - <xml>Non-HTML tag xml</xml> - <p> - Disallowed tag p - </p> -
    • OK
    - - The output with '$config["keep_bad"] = 6': - - <*> Pseudo-tags <*> - Non-HTML tag xml - - Disallowed tag p - -
    • OK
    - - An option like '1' is useful, e.g., when a writer previews his submission, whereas one like '3' is useful before content is finalized and made available to all. - - *Note:* In the example above, unlike '<*>', '' gets considered as a tag (even though there is no HTML element named 'xml'). In general, text matching the regular expression pattern '<(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>' is considered a tag (phrase enclosed by the angled brackets '<' and '>', and starting [with an optional slash preceding] with an alphanumeric word that starts with an alphabet...). - - Nesting/content rules for each of the 86 elements in htmLawed's default set (see section:- #3.3) are defined in function 'hl_bal()'. This means that if a non-standard element besides 'embed' is being permitted through '$config["elements"]', the element's tag content will end up getting removed if '$config["balance"]' is set to '1'. - - Plain text and/or certain elements nested inside 'blockquote', 'form', 'map' and 'noscript' need to be in block-level elements. This point is often missed during manual writing of HTML code. htmLawed attempts to address this during balancing. E.g., if the parent container is set as 'form', the input 'B:C:' is converted to '
    B:C:
    '. - - --- 3.3.4 Elements requiring child elements ------------------------o - - - As per specs, the following elements require legal child elements nested inside them: - - blockquote, dir, dl, form, map, menu, noscript, ol, optgroup, rbc, rtc, ruby, select, table, tbody, tfoot, thead, tr, ul - - In some cases, the specs stipulate the number and/or the ordering of the child elements. A 'table' can have 0 or 1 'caption', 'tbody', 'tfoot', and 'thead', but they must be in this order: 'caption', 'thead', 'tfoot', 'tbody'. - - htmLawed currently does not check for conformance to these rules. Note that any non-compliance in this regard will not introduce security vulnerabilities, crash browser applications, or affect the rendering of web-pages. - - With '$config["direct_list_nest"]' set to '1', htmLawed will allow direct nesting of an 'ol' or 'ul' list within another 'ol' or 'ul' without requiring the child list to be within an 'li' of the parent list. While this is not standard-compliant, directly nested lists are rendered properly by almost all browsers. The parameter '$config["direct_list_nest"]' has no effect if tag-balancing (section:- #3.3.3) is turned off. - - --- 3.3.5 Beautify or compact HTML ---------------------------------o - - - By default, htmLawed will neither `beautify` HTML code by formatting it with indentations, etc., nor will it make it compact by removing un-needed white-space.(It does always properly white-space tag content.) - - As per the HTML standards, spaces, tabs and line-breaks in web-pages (except those inside 'pre' elements) are all considered equivalent, and referred to as `white-spaces`. Browser applications are supposed to consider contiguous white-spaces as just a single space, and to disregard white-spaces trailing opening tags or preceding closing tags. This white-space `normalization` allows the use of text/code beautifully formatted with indentations and line-spacings for readability. Such `pretty` HTML can, however, increase the size of web-pages, or make the extraction or scraping of plain text cumbersome. - - With the '$config' parameter 'tidy', htmLawed can be used to beautify or compact the input text. Input with just plain text and no HTML markup is also subject to this. Besides 'pre', the 'script' and 'textarea' elements, CDATA sections, and HTML comments are not subjected to the tidying process. - - To `compact`, use '$config["tidy"] = -1'; single instances or runs of white-spaces are replaced with a single space, and white-spaces trailing and leading open and closing tags, respectively, are removed. - - To `beautify`, '$config["tidy"]' is set as '1', or for customized tidying, as a string like '2s2n'. The 's' or 't' character specifies the use of spaces or tabs for indentation. The first and third characters, any of the digits 0-9, specify the number of spaces or tabs per indentation, and any parental lead spacing (extra indenting of the whole block of input text). The 'r' and 'n' characters are used to specify line-break characters: 'n' for '\n' (Unix/Mac OS X line-breaks), 'rn' or 'nr' for '\r\n' (Windows/DOS line-breaks), or 'r' for '\r'. - - The '$config["tidy"]' value of '1' is equivalent to '2s0n'. Other '$config["tidy"]' values are read loosely: a value of '4' is equivalent to '4s0n'; 't2', to '1t2n'; 's', to '2s0n'; '2TR', to '2t0r'; 'T1', to '1t1n'; 'nr3', to '3s0nr', and so on. Except in the indentations and line-spacings, runs of white-spaces are replaced with a single space during beautification. - - Input formatting using '$config["tidy"]' is not recommended when input text has mixed markup (like HTML + PHP). - - --- 3.4 Attributes ------------------------------------------------oo - - - htmLawed will only permit attributes described in the HTML specs (including deprecated ones). It also permits some attributes for use with the 'embed' element (the non-standard 'embed' element is supported in htmLawed because of its widespread use), and the the 'xml:space' attribute (valid only in XHTML 1.1). A list of such 111 attributes and the elements they are allowed in is in section:- #5.2. - - When '$config["deny_attribute"]' is not set, or set to '0', or empty ('""'), all the 111 attributes are permitted. Otherwise, '$config["deny_attribute"]' can be set as a list of comma-separated names of the denied attributes. 'on*' can be used to refer to the group of potentially dangerous, script-accepting attributes: 'onblur', 'onchange', 'onclick', 'ondblclick', 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onreset', 'onselect' and 'onsubmit'. - - Note that attributes specified in '$config["deny_attribute"]' are denied globally, for all elements. To deny attributes for only specific elements, '$spec' (see section:- #2.3) can be used. '$spec' can also be used to element-specifically permit an attribute otherwise denied through '$config["deny_attribute"]'. - - With '$config["safe"] = 1' (section:- #3.6), the 'on*' attributes are automatically disallowed. - - *Note*: To deny all but a few attributes globally, a simpler way to specify '$config["deny_attribute"]' would be to use the notation '* -attribute1 -attribute2 ...'. Thus, a value of '* -title -href' implies that except 'href' and 'title' (where allowed as per standards) all other attributes are to be removed. With this notation, the value for the parameter 'safe' (section:- #3.6) will have no effect on 'deny_attribute'. - - htmLawed (function 'hl_tag()') also: - - * Lower-cases attribute names - * Removes duplicate attributes (last one stays) - * Gives attributes the form 'name="value"' and single-spaces them, removing unnecessary white-spacing - * Provides `required` attributes (see section:- #3.4.1) - * Double-quotes values and escapes any '"' inside them - * Replaces the possibly dangerous soft-hyphen characters (hexadecimal code-point 'ad') in the values with spaces - * Allows custom function to additionally filter/modify attribute values (see section:- #3.4.9) - - -.. 3.4.1 Auto-addition of XHTML-required attributes ................ - - - If indicated attributes for the following elements are found missing, htmLawed (function 'hl_tag()') will add them (with values same as attribute names unless indicated otherwise below): - - * area - alt ('area') - * area, img - src, alt ('image') - * bdo - dir ('ltr') - * form - action - * map - name - * optgroup - label - * param - name - * script - type ('text/javascript') - * textarea - rows ('10'), cols ('50') - - Additionally, with '$config["xml:lang"]' set to '1' or '2', if the 'lang' but not the 'xml:lang' attribute is declared, then the latter is added too, with a value copied from that of 'lang'. This is for better standard-compliance. With '$config["xml:lang"]' set to '2', the 'lang' attribute is removed (XHTML 1.1 specs). - - Note that the 'name' attribute for 'map', invalid in XHTML 1.1, is also transformed if required -- see section:- #3.4.6. - - -.. 3.4.2 Duplicate/invalid 'id' values ............................o - - - If '$config["unique_ids"]' is '1', htmLawed (function 'hl_tag()') removes 'id' attributes with values that are not XHTML-compliant (must begin with a letter and can contain letters, digits, ':', '.', '-' and '_') or duplicate. If '$config["unique_ids"]' is a word, any duplicate but otherwise valid value will be appropriately prefixed with the word to ensure its uniqueness. The word should begin with a letter and should contain only letters, numbers, ':', '.', '_' and '-'. - - Even if multiple inputs need to be filtered (through multiple calls to htmLawed), htmLawed ensures uniqueness of 'id' values as it uses a global variable ('$GLOBALS["hl_Ids"]' array). Further, an admin can restrict the use of certain 'id' values by presetting this variable before htmLawed is called into use. E.g.: - - $GLOBALS['hl_Ids'] = array('top'=>1, 'bottom'=>1, 'myform'=>1); // id values not allowed in input - $processed = htmLawed($text); // filter input - - -.. 3.4.3 URL schemes (protocols) and scripts in attribute values ............o - - - htmLawed edits attributes that take URLs as values if they are found to contain un-permitted schemes. E.g., if the 'afp' scheme is not permitted, then '' becomes '', and if Javascript is not permitted '' becomes ''. - - By default htmLawed permits these schemes in URLs for the 'href' attribute: - - aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet - - Also, only 'file', 'http' and 'https' are permitted in attributes whose names start with 'o' (like 'onmouseover'), and in these attributes that accept URLs: - - action, cite, classid, codebase, data, href, longdesc, model, pluginspage, pluginurl, src, style, usemap - - These default sets are used when '$config["schemes"]' is not set (see section:- #2.2). To over-ride the defaults, '$config["schemes"]' is defined as a string of semi-colon-separated sub-strings of type 'attribute: comma-separated schemes'. E.g., 'href: mailto, http, https; onclick: javascript; src: http, https'. For unspecified attributes, 'file', 'http' and 'https' are permitted. This can be changed by passing schemes for '*' in '$config["schemes"]'. E.g., 'href: mailto, http, https; *: https, https'. - - '*' can be put in the list of schemes to permit all protocols. E.g., 'style: *; img: http, https' results in protocols not being checked in 'style' attribute values. However, in such cases, any relative-to-absolute URL conversion, or vice versa, (section:- #3.4.4) is not done. - - Thus, `to allow Javascript`, one can set '$config["schemes"]' as 'href: mailto, http, https; *: http, https, javascript', or 'href: mailto, http, https, javascript; *: http, https, javascript', or '*: *', and so on. - - As a side-note, one may find 'style: *' useful as URLs in 'style' attributes can be specified in a variety of ways, and the patterns that htmLawed uses to identify URLs may mistakenly identify non-URL text. - - '!' can be put in the list of schemes to disallow all protocols as well as `local` URLs. Thus, with 'href: http, style: !', 'CNN' will become 'CNN'. - - *Note*: If URL-accepting attributes other than those listed above are being allowed, then the scheme will not be checked unless the attribute name contains the string 'src' (e.g., 'dynsrc') or starts with 'o' (e.g., 'onbeforecopy'). - - With '$config["safe"] = 1', all URLs are disallowed in the 'style' attribute values. - - -.. 3.4.4 Absolute & relative URLs in attribute values .............o - - - htmLawed can make absolute URLs in attributes like 'href' relative ('$config["abs_url"]' is '-1'), and vice versa ('$config["abs_url"]' is '1'). URLs in scripts are not considered for this, and so are URLs like '#section_6' (fragment), '?name=Tim#show' (starting with query string), and ';var=1?name=Tim#show' (starting with parameters). Further, this requires that '$config["base_url"]' be set properly, with the '://' and a trailing slash ('/'), with no query string, etc. E.g., 'file:///D:/page/', 'https://abc.com/x/y/', or 'http://localhost/demo/' are okay, but 'file:///D:/page/?help=1', 'abc.com/x/y/' and 'http://localhost/demo/index.htm' are not. - - For making absolute URLs relative, only those URLs that have the '$config["base_url"]' string at the beginning are converted. E.g., with '$config["base_url"] = "https://abc.com/x/y/"', 'https://abc.com/x/y/a.gif' and 'https://abc.com/x/y/z/b.gif' become 'a.gif' and 'z/b.gif' respectively, while 'https://abc.com/x/c.gif' is not changed. - - When making relative URLs absolute, only values for scheme, network location (host-name) and path values in the base URL are inherited. See section:- #5.5 for more about the URL specification as per RFC 1808:- http://www.ietf.org/rfc/rfc1808.txt. - - -.. 3.4.5 Lower-cased, standard attribute values ....................o - - - Optionally, for standard-compliance, htmLawed (function 'hl_tag()') lower-cases standard attribute values to give, e.g., 'input type="password"' instead of 'input type="Password"', if '$config["lc_std_val"]' is '1'. Attribute values matching those listed below for any of the elements (plus those for the 'type' attribute of 'button' or 'input') are lower-cased: - - all, baseline, bottom, button, center, char, checkbox, circle, col, colgroup, cols, data, default, file, get, groups, hidden, image, justify, left, ltr, middle, none, object, password, poly, post, preserve, radio, rect, ref, reset, right, row, rowgroup, rows, rtl, submit, text, top - - a, area, bdo, button, col, form, img, input, object, option, optgroup, param, script, select, table, td, tfoot, th, thead, tr, xml:space - - The following `empty` (`minimized`) attributes are always assigned lower-cased values (same as the names): - - checked, compact, declare, defer, disabled, ismap, multiple, nohref, noresize, noshade, nowrap, readonly, selected - - -.. 3.4.6 Transformation of deprecated attributes ..................o - - - If '$config["no_deprecated_attr"]' is '0', then deprecated attributes (see appendix in section:- #5.2) are removed and, in most cases, their values are transformed to CSS style properties and added to the 'style' attributes (function 'hl_tag()'). Except for 'bordercolor' for 'table', 'tr' and 'td', the scores of proprietary attributes that were never part of any cross-browser standard are not supported. - - *Note*: The attribute 'target' for 'a' is allowed even though it is not in XHTML 1.0 specs. This is because of the attribute's wide-spread use and browser-support, and because the attribute is valid in XHTML 1.1 onwards. - - * align - for 'img' with value of 'left' or 'right', becomes, e.g., 'float: left'; for 'div' and 'table' with value 'center', becomes 'margin: auto'; all others become, e.g., 'text-align: right' - - * bgcolor - E.g., 'bgcolor="#ffffff"' becomes 'background-color: #ffffff' - * border - E.g., 'height= "10"' becomes 'height: 10px' - * bordercolor - E.g., 'bordercolor=#999999' becomes 'border-color: #999999;' - * compact - 'font-size: 85%' - * clear - E.g., 'clear="all" becomes 'clear: both' - - * height - E.g., 'height= "10"' becomes 'height: 10px' and 'height="*"' becomes 'height: auto' - - * hspace - E.g., 'hspace="10"' becomes 'margin-left: 10px; margin-right: 10px' - * language - 'language="VBScript"' becomes 'type="text/vbscript"' - * name - E.g., 'name="xx"' becomes 'id="xx"' - * noshade - 'border-style: none; border: 0; background-color: gray; color: gray' - * nowrap - 'white-space: nowrap' - * size - E.g., 'size="10"' becomes 'height: 10px' - * start - removed - * type - E.g., 'type="i"' becomes 'list-style-type: lower-roman' - * value - removed - * vspace - E.g., 'vspace="10"' becomes 'margin-top: 10px; margin-bottom: 10px' - * width - like 'height' - - Example input: - - imageimage -
    -
    - image - - - - - -
    -
    -

    Section

    -

    Para

    -
    1. First item
    -
    -
    -
    1. First item
    -
    -
    - - And the output with '$config["no_deprecated_attr"] = 1': - - imageimage -
    -
    - image - - - - - -
    -
    -

    Section

    -

    Para

    -
    1. First item
    -
    -
    -
    1. First item
    -
    -
    - - For 'lang', deprecated in XHTML 1.1, transformation is taken care of through '$config["xml:lang"]'; see section:- #3.4.1. - - The attribute 'name' is deprecated in 'form', 'iframe', and 'img', and is replaced with 'id' if an 'id' attribute doesn't exist and if the 'name' value is appropriate for 'id'. For such replacements for 'a' and 'map', for which the 'name' attribute is deprecated in XHTML 1.1, '$config["no_deprecated_attr"]' should be set to '2' (when set to '1', for these two elements, the 'name' attribute is retained). - - --- 3.4.7 Anti-spam & 'href' ---------------------------------------o - - - htmLawed (function 'hl_tag()') can check the 'href' attribute values (link addresses) as an anti-spam (email or link spam) measure. - - If '$config["anti_mail_spam"]' is not '0', the '@' of email addresses in 'href' values like 'mailto:a@b.com' is replaced with text specified by '$config["anti_mail_spam"]'. The text should be of a form that makes it clear to others that the address needs to be edited before a mail is sent; e.g., '@' (makes the example address 'a@b.com'). - - For regular links, one can choose to have a 'rel' attribute with 'nofollow' in its value (which tells some search engines to not follow a link). This can discourage link spammers. Additionally, or as an alternative, one can choose to empty the 'href' value altogether (disable the link). - - For use of these options, '$config["anti_link_spam"]' should be set as an array with values 'regex1' and 'regex2', both or one of which can be empty (like 'array("", "regex2")') to indicate that that option is not to be used. Otherwise, 'regex1' or 'regex2' should be PHP- and PCRE-compatible regular expression patterns: 'href' values will be matched against them and those matching the pattern will accordingly be treated. - - Note that the regular expressions should have `delimiters`, and be well-formed and preferably fast. Absolute efficiency/accuracy is often not needed. - - An example, to have a 'rel' attribute with 'nofollow' for all links, and to disable links that do not point to domains 'abc.com' and 'xyz.org': - - $config["anti_link_spam"] = array('`.`', '`://\W*(?!(abc\.com|xyz\.org))`'); - - --- 3.4.8 Inline style properties ----------------------------------o - - - htmLawed can check URL schemes and dynamic expressions (to guard against Javascript, etc., script-based insecurities) in inline CSS style property values in the 'style' attributes. (CSS properties like 'background-image' that accept URLs in their values are noted in section:- #5.3.) Dynamic CSS expressions that allow scripting in the IE browser, and can be a vulnerability, can be removed from property values by setting '$config["css_expression"]' to '1' (default setting). Note that when '$config["css_expression"]' is set to '1', htmLawed will remove '/*' from the 'style' values. - - *Note*: Because of the various ways of representing characters in attribute values (URL-escapement, entitification, etc.), htmLawed might alter the values of the 'style' attribute values, and may even falsely identify dynamic CSS expressions and URL schemes in them. If this is an important issue, checking of URLs and dynamic expressions can be turned off ('$config["schemes"] = "...style:*..."', see section:- #3.4.3, and '$config["css_expression"] = 0'). Alternately, admins can use their own custom function for finer handling of 'style' values through the 'hook_tag' parameter (see section:- #3.4.9). - - It is also possible to have htmLawed let through any 'style' value by setting '$config["style_pass"]' to '1'. - - As such, it is better to set up a CSS file with class declarations, disallow the 'style' attribute, set a '$spec' rule (see section:- #2.3) for 'class' for the 'oneof' or 'match' parameter, and ask writers to make use of the 'class' attribute. - - --- 3.4.9 Hook function for tag content ----------------------------o - - - It is possible to utilize a custom hook function to alter the tag content htmLawed has finalized (i.e., after it has checked/corrected for required attributes, transformed attributes, lower-cased attribute names, etc.). - - When '$config' parameter 'hook_tag' is set to the name of a function, htmLawed (function 'hl_tag()') will pass on the element name, and, in the case of an opening tag, the `finalized` attribute name-value pairs as array elements to the function. The function, after completing a task such as filtering or tag transformation, will typically return an empty string, the full opening tag string like '' (for empty elements like 'img' and 'input', the element-closing slash '/' should also be included), etc. - - Any 'hook_tag' function, since htmLawed version 1.1.11, also receives names of elements in closing tags, such as 'a' in the closing '' tag of the element 'CNN'. Unlike for opening tags, no other value (i.e., the attribute name-value array) is passed to the function since a closing tag contains only element names. Typically, the function will return an empty string or a full closing tag (like ''). - - This is a *powerful functionality* that can be exploited for various objectives: consolidate-and-convert inline 'style' attributes to 'class', convert 'embed' elements to 'object', permit only one 'caption' element in a 'table' element, disallow embedding of certain types of media, *inject HTML*, use CSSTidy:- http://csstidy.sourceforge.net to sanitize 'style' attribute values, etc. - - As an example, the custom hook code below can be used to force a series of specifically ordered 'id' attributes on all elements, and a specific 'param' element inside all 'object' elements: - - function my_tag_function($element, $attribute_array=0){ - - // If second argument is not received, it means a closing tag is being handled - if(is_numeric($attribute_array)){ - return ""; - } - - static $id = 0; - // Remove any duplicate element - if($element == 'param' && isset($attribute_array['allowscriptaccess'])){ - return ''; - } - - $new_element = ''; - - // Force a serialized ID number - $attribute_array['id'] = 'my_'. $id; - ++$id; - - // Inject param for allowscriptaccess - if($element == 'object'){ - $new_element = ''; - ++$id; - } - - $string = ''; - foreach($attribute_array as $k=>$v){ - $string .= " {$k}=\"{$v}\""; - } - - static $empty_elements = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); - - return "<{$element}{$string}". (isset($in_array($element, $empty_elements) ? ' /' : ''). '>'. $new_element; - } - - The 'hook_tag' parameter is different from the 'hook' parameter (section:- #3.7). - - Snippets of hook function code developed by others may be available on the htmLawed:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed website. - - --- 3.5 Simple configuration directive for most valid XHTML -------oo - - - If '$config["valid_xhtml"]' is set to '1', some relevant '$config' parameters (indicated by '~' in section:- #2.2) are auto-adjusted. This allows one to pass the '$config' argument with a simpler value. If a value for a parameter auto-set through 'valid_xhtml' is still manually provided, then that value will over-ride the auto-set value. - - --- 3.6 Simple configuration directive for most `safe` HTML --------o - - - `Safe` HTML refers to HTML that is restricted to reduce the vulnerability for scripting attacks (such as XSS) based on HTML code which otherwise may still be legal and compliant with the HTML standard specs. When elements such as 'script' and 'object', and attributes such as 'onmouseover' and 'style' are allowed in the input text, an input writer can introduce malevolent HTML code. Note that what is considered 'safe' depends on the nature of the web application and the trust-level accorded to its users. - - htmLawed allows an admin to use '$config["safe"]' to auto-adjust multiple '$config' parameters (such as 'elements' which declares the allowed element-set), which otherwise would have to be manually set. The relevant parameters are indicated by '"' in section:- #2.2). Thus, one can pass the '$config' argument with a simpler value. - - With the value of '1', htmLawed considers 'CDATA' sections and HTML comments as plain text, and prohibits the 'applet', 'embed', 'iframe', 'object' and 'script' elements, and the 'on*' attributes like 'onclick'. ( There are '$config' parameters like 'css_expression' that are not affected by the value set for 'safe' but whose default values still contribute towards a more `safe` output.) Further, URLs with schemes (see section:- #3.4.3) are neutralized so that, e.g., 'style="moz-binding:url(http://danger)"' becomes 'style="moz-binding:url(denied:http://danger)"'. - - Admins, however, may still want to completely deny the 'style' attribute, e.g., with code like - - $processed = htmLawed($text, array('safe'=>1, 'deny_attribute'=>'style')); - - Permitting the 'style' attribute brings in risks of `click-jacking`, etc. CSS property values can render a page non-functional or be used to deface it. Except for URLs, dynamic expressions, and some other things, htmLawed does not completely check 'style' values. It does provide ways for the code-developer implementing htmLawed to do such checks through the '$spec' argument, and through the 'hook_tag' parameter (see section:- #3.4.8 for more). Disallowing style completely and relying on CSS classes and stylesheet files is recommended. - - If a value for a parameter auto-set through 'safe' is still manually provided, then that value can over-ride the auto-set value. E.g., with '$config["safe"] = 1' and '$config["elements"] = "*+script"', 'script', but not 'applet', is allowed. - - A page illustrating the efficacy of htmLawed's anti-XSS abilities with 'safe' set to '1' against XSS vectors listed by RSnake:- http://ha.ckers.org/xss.html may be available here:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/rsnake/RSnakeXSSTest.htm. - - --- 3.7 Using a hook function --------------------------------------o - - - If '$config["hook"]' is not set to '0', then htmLawed will allow preliminarily processed input to be altered by a hook function named by '$config["hook"]' before starting the main work (but after handling of characters, entities, HTML comments and 'CDATA' sections -- see code for function 'htmLawed()'). - - The hook function also allows one to alter the `finalized` values of '$config' and '$spec'. - - Note that the 'hook' parameter is different from the 'hook_tag' parameter (section:- #3.4.9). - - Snippets of hook function code developed by others may be available on the htmLawed:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed website. - - --- 3.8 Obtaining `finalized` parameter values ---------------------o - - - htmLawed can assign the `finalized` '$config' and '$spec' values to a variable named by '$config["show_setting"]'. The variable, made global by htmLawed, is set as an array with three keys: 'config', with the '$config' value, 'spec', with the '$spec' value, and 'time', with a value that is the Unix time (the output of PHP's 'microtime()' function) when the value was assigned. Admins should use a PHP-compliant variable name (e.g., one that does not begin with a numerical digit) that does not conflict with variable names in their non-htmLawed code. - - The values, which are also post-hook function (if any), can be used to auto-generate information (on, e.g., the elements that are permitted) for input writers. - - --- 3.9 Retaining non-HTML tags in input with mixed markup ---------o - - - htmLawed does not remove certain characters that though invalid are nevertheless discouraged in HTML documents as per the specs (see section:- #5.1). This can be utilized to deal with input that contains mixed markup. Input that may have HTML markup as well as some other markup that is based on the '<', '>' and '&' characters is considered to have mixed markup. The non-HTML markup can be rather proprietary (like markup for emoticons/smileys), or standard (like MathML or SVG). Or it can be programming code meant for execution/evaluation (such as embedded PHP code). - - To deal with such mixed markup, the input text can be pre-processed to hide the non-HTML markup by specifically replacing the '<', '>' and '&' characters with some of the HTML-discouraged characters (see section:- #3.1.2). Post-htmLawed processing, the replacements are reverted. - - An example (mixed HTML and PHP code in input text): - - $text = preg_replace('`<\?php(.+?)\?>`sm', "\x83?php\\1?\x84", $text); - $processed = htmLawed($text); - $processed = preg_replace('`\x83\?php(.+?)\?\x84`sm', '', $processed); - - This code will not work if '$config["clean_ms_char"]' is set to '1' (section:- #3.1), in which case one should instead deploy a hook function (section:- #3.7). (htmLawed internally uses certain control characters, code-points '1' to '7', and use of these characters as markers in the logic of hook functions may cause issues.) - - Admins may also be able to use '$config["and_mark"]' to deal with such mixed markup; see section:- #3.2. - - -== 4 Other =======================================================oo - - --- 4.1 Support ----------------------------------------------------- - - - A careful re-reading of this documentation will very likely answer your questions. - - Software updates and forum-based community-support may be found at http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed. For general PHP issues (not htmLawed-specific), support may be found through internet searches and at http://php.net. - - --- 4.2 Known issues -----------------------------------------------o - - - See section:- #2.8. - - Readers are advised to cross-check information given in this document. - - --- 4.3 Change-log -------------------------------------------------o - - - (The release date for the downloadable package of files containing documentation, demo script, test-cases, etc., besides the 'htmLawed.php' file may be updated independently if the secondary files are revised.) - - `Version number - Release date. Notes` - - 1.1.11 - 5 June 2012. Fix for possible problem with handling of multi-byte characters in attribute values in an mbstring.func_overload enviroment. '$config["hook_tag"]', if specified, now receives names of elements in closing tags. - - 1.1.10 - 22 October 2011. Fix for a bug in the 'tidy' functionality that caused the entire input to be replaced with a single space; new parameter, '$config["direct_list_nest"]' to allow direct descendance of a list in a list. (5 April 2012. Dual licensing from LGPLv3 to LGPLv3 and GPLv2+.) - - 1.1.9.5 - 6 July 2011. Minor correction of a rule for nesting of 'li' within 'dir' - - 1.1.9.4 - 3 July 2010. Parameter 'schemes' now accepts '!' so any URL, even a local one, can be `denied`. An issue in which a second URL value in 'style' properties was not checked was fixed. - - 1.1.9.3 - 17 May 2010. Checks for correct nesting of 'param' - - 1.1.9.2 - 26 April 2010. Minor fix regarding rendering of denied URL schemes - - 1.1.9.1 - 26 February 2010. htmLawed now uses the LGPL version 3 license; support for 'flashvars' attribute for 'embed' - - 1.1.9 - 22 December 2009. Soft-hyphens are now removed only from URL-accepting attribute values - - 1.1.8.1 - 16 July 2009. Minor code-change to fix a PHP error notice - - 1.1.8 - 23 April 2009. Parameter 'deny_attribute' now accepts the wild-card '*', making it simpler to specify its value when all but a few attributes are being denied; fixed a bug in interpreting '$spec' - - 1.1.7 - 11-12 March 2009. Attributes globally denied through 'deny_attribute' can be allowed element-specifically through '$spec'; '$config["style_pass"]' allowing letting through any 'style' value introduced; altered logic to catch certain types of dynamic crafted CSS expressions - - 1.1.3-6 - 28-31 January - 4 February 2009. Altered logic to catch certain types of dynamic crafted CSS expressions - - 1.1.2 - 22 January 2009. Fixed bug in parsing of 'font' attributes during tag transformation - - 1.1.1 - 27 September 2008. Better nesting correction when omitable closing tags are absent - - 1.1 - 29 June 2008. '$config["hook_tag"]' and '$config["format"]' introduced for custom tag/attribute check/modification/injection and output compaction/beautification; fixed a regex-in-$spec parsing bug - - 1.0.9 - 11 June 2008. Fixed bug in invalid HTML code-point entity check - - 1.0.8 - 15 May 2008. 'bordercolor' attribute for 'table', 'td' and 'tr' - - 1.0.7 - 1 May 2008. Support for 'wmode' attribute for 'embed'; '$config["show_setting"]' introduced; improved '$config["elements"]' evaluation - - 1.0.6 - 20 April 2008. '$config["and_mark"]' introduced - - 1.0.5 - 12 March 2008. 'style' URL schemes essentially disallowed when $config 'safe' is on; improved regex for CSS expression search - - 1.0.4 - 10 March 2008. Improved corrections for 'blockquote', 'form', 'map' and 'noscript' - - 1.0.3 - 3 March 2008. Character entities for soft-hyphens are now replaced with spaces (instead of being removed); a bug allowing 'td' directly inside 'table' fixed; 'safe' '$config' parameter added - - 1.0.2 - 13 February 2008. Improved implementation of '$config["keep_bad"]' - - 1.0.1 - 7 November 2007. Improved regex for identifying URLs, protocols and dynamic expressions ('hl_tag()' and 'hl_prot()'); no error display with 'hl_regex()' - - 1.0 - 2 November 2007. First release - - --- 4.4 Testing ----------------------------------------------------o - - - To test htmLawed using a form interface, a demo:- htmLawedTest.php web-page is provided with the htmLawed distribution ('htmLawed.php' and 'htmLawedTest.php' should be in the same directory on the web-server). A file with test-cases:- htmLawed_TESTCASE.txt is also provided. - - --- 4.5 Upgrade, & old versions ------------------------------------o - - - Upgrading is as simple as replacing the previous version of 'htmLawed.php' (assuming it was not modified for customized features). As htmLawed output is almost always used in static documents, upgrading should not affect old, finalized content. - - *Important* The following upgrades may affect the functionality of a specific htmLawed as indicated by their corresponding notes: - - (1) From version 1.1-1.1.10 to 1.1.11, if a 'hook_tag' function is in use: In version 1.1.11, elements in closing tags (and not just the opening tags) are also passed to the function. There are no attribute names/values to pass, so a 'hook_tag' function receives only the element name. The 'hook_tag' function therefore may have to be edited. See section:- #3.4.9. - - Old versions of htmLawed may be available online. E.g., for version 1.0, check http://www.bioinformatics.org/phplabware/downloads/htmLawed1.zip, for 1.1.1, htmLawed111.zip, and for 1.1.10, htmLawed1110.zip. - - --- 4.6 Comparison with 'HTMLPurifier' -----------------------------o - - - The HTMLPurifier PHP library by Edward Yang is a very good HTML filtering script that uses object oriented PHP code. Compared to htmLawed, it (as of mid-2009): - - * does not support PHP versions older than 5.0 (HTMLPurifier dropped PHP 4 support after version 2) - - * is 15-20 times bigger (scores of files totalling more than 750 kb) - - * consumes 10-15 times more RAM memory (just including the HTMLPurifier files without calling the filter requires a few MBs of memory) - - * is expectedly slower - - * does not allow admins to fully allow all valid HTML (because of incomplete HTML support, it always considers elements like 'script' illegal) - - * lacks many of the extra features of htmLawed (like entity conversions and code compaction/beautification) - - * has poor documentation - - However, HTMLPurifier has finer checks for character encodings and attribute values, and can log warnings and errors. Visit the HTMLPurifier website:- http://htmlpurifier.org for updated information. - - --- 4.7 Use through application plug-ins/modules -------------------o - - - Plug-ins/modules to implement htmLawed in applications such as Drupal and DokuWiki may have been developed. Please check the application websites and the forum on the htmLawed site:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed. - - --- 4.8 Use in non-PHP applications --------------------------------o - - - Non-PHP applications written in Python, Ruby, etc., may be able to use htmLawed through system calls to the PHP engine. Such code may have been documented on the internet. Also check the forum on the htmLawed site:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed. - - --- 4.9 Donate -----------------------------------------------------o - - - A donation in any currency and amount to appreciate or support this software can be sent by PayPal:- http://paypal.com to this email address: drpatnaik at yahoo dot com. - - --- 4.10 Acknowledgements ------------------------------------------o - - - Nicholas Alipaz, Bryan Blakey, Pádraic Brady, Ulf Harnhammer, Gareth Heyes, Klaus Leithoff, Lukasz Pilorz, Shelley Powers, Edward Yang, and many anonymous users. - - Thank you! - - -== 5 Appendices ==================================================oo - - --- 5.1 Characters discouraged in XHTML ----------------------------- - - - Characters represented by the following hexadecimal code-points are `not` invalid, even though some validators may issue messages stating otherwise. - - '7f' to '84', '86' to '9f', 'fdd0' to 'fddf', '1fffe', '1ffff', '2fffe', '2ffff', '3fffe', '3ffff', '4fffe', '4ffff', '5fffe', '5ffff', '6fffe', '6ffff', '7fffe', '7ffff', '8fffe', '8ffff', '9fffe', '9ffff', 'afffe', 'affff', 'bfffe', 'bffff', 'cfffe', 'cffff', 'dfffe', 'dffff', 'efffe', 'effff', 'ffffe', 'fffff', '10fffe' and '10ffff' - - --- 5.2 Valid attribute-element combinations -----------------------o - - - Valid attribute-element combinations as per W3C specs. - - * includes deprecated attributes (marked '^'), attributes for the non-standard 'embed' element (marked '*'), and the proprietary 'bordercolor' (marked '~') - * only non-frameset, HTML body elements - * 'name' for 'a' and 'map', and 'lang' are invalid in XHTML 1.1 - * 'target' is valid for 'a' in XHTML 1.1 and higher - * 'xml:space' is only for XHTML 1.1 - - abbr - td, th - accept - form, input - accept-charset - form - accesskey - a, area, button, input, label, legend, textarea - action - form - align - caption^, embed, applet, iframe, img^, input^, object^, legend^, table^, hr^, div^, h1^, h2^, h3^, h4^, h5^, h6^, p^, col, colgroup, tbody, td, tfoot, th, thead, tr - alt - applet, area, img, input - archive - applet, object - axis - td, th - bgcolor - embed, table^, tr^, td^, th^ - border - table, img^, object^ - bordercolor~ - table, td, tr - cellpadding - table - cellspacing - table - char - col, colgroup, tbody, td, tfoot, th, thead, tr - charoff - col, colgroup, tbody, td, tfoot, th, thead, tr - charset - a, script - checked - input - cite - blockquote, q, del, ins - classid - object - clear - br^ - code - applet - codebase - object, applet - codetype - object - color - font - cols - textarea - colspan - td, th - compact - dir, dl^, menu, ol^, ul^ - coords - area, a - data - object - datetime - del, ins - declare - object - defer - script - dir - bdo - disabled - button, input, optgroup, option, select, textarea - enctype - form - face - font - flashvars* - embed - for - label - frame - table - frameborder - iframe - headers - td, th - height - embed, iframe, td^, th^, img, object, applet - href - a, area - hreflang - a - hspace - applet, img^, object^ - ismap - img, input - label - option, optgroup - language - script^ - longdesc - img, iframe - marginheight - iframe - marginwidth - iframe - maxlength - input - method - form - model* - embed - multiple - select - name - button, embed, textarea, applet^, select, form^, iframe^, img^, a^, input, object, map^, param - nohref - area - noshade - hr^ - nowrap - td^, th^ - object - applet - onblur - a, area, button, input, label, select, textarea - onchange - input, select, textarea - onfocus - a, area, button, input, label, select, textarea - onreset - form - onselect - input, textarea - onsubmit - form - pluginspage* - embed - pluginurl* - embed - prompt - isindex - readonly - textarea, input - rel - a - rev - a - rows - textarea - rowspan - td, th - rules - table - scope - td, th - scrolling - iframe - selected - option - shape - area, a - size - hr^, font, input, select - span - col, colgroup - src - embed, script, input, iframe, img - standby - object - start - ol^ - summary - table - tabindex - a, area, button, input, object, select, textarea - target - a^, area, form - type - a, embed, object, param, script, input, li^, ol^, ul^, button - usemap - img, input, object - valign - col, colgroup, tbody, td, tfoot, th, thead, tr - value - input, option, param, button, li^ - valuetype - param - vspace - applet, img^, object^ - width - embed, hr^, iframe, img, object, table, td^, th^, applet, col, colgroup, pre^ - wmode - embed - xml:space - pre, script, style - - These are allowed in all but the shown elements: - - class - param, script - dir - applet, bdo, br, iframe, param, script - id - script - lang - applet, br, iframe, param, script - onclick - applet, bdo, br, font, iframe, isindex, param, script - ondblclick - applet, bdo, br, font, iframe, isindex, param, script - onkeydown - applet, bdo, br, font, iframe, isindex, param, script - onkeypress - applet, bdo, br, font, iframe, isindex, param, script - onkeyup - applet, bdo, br, font, iframe, isindex, param, script - onmousedown - applet, bdo, br, font, iframe, isindex, param, script - onmousemove - applet, bdo, br, font, iframe, isindex, param, script - onmouseout - applet, bdo, br, font, iframe, isindex, param, script - onmouseover - applet, bdo, br, font, iframe, isindex, param, script - onmouseup - applet, bdo, br, font, iframe, isindex, param, script - style - param, script - title - param, script - xml:lang - applet, br, iframe, param, script - - --- 5.3 CSS 2.1 properties accepting URLs ------------------------o - - - background - background-image - content - cue-after - cue-before - cursor - list-style - list-style-image - play-during - - --- 5.4 Microsoft Windows 1252 character replacements --------------o - - - Key: 'd' double, 'l' left, 'q' quote, 'r' right, 's.' single - - Code-point (decimal) - hexadecimal value - replacement entity - represented character - - 127 - 7f - (removed) - (not used) - 128 - 80 - € - euro - 129 - 81 - (removed) - (not used) - 130 - 82 - ‚ - baseline s. q - 131 - 83 - ƒ - florin - 132 - 84 - „ - baseline d q - 133 - 85 - … - ellipsis - 134 - 86 - † - dagger - 135 - 87 - ‡ - d dagger - 136 - 88 - ˆ - circumflex accent - 137 - 89 - ‰ - permile - 138 - 8a - Š - S Hacek - 139 - 8b - ‹ - l s. guillemet - 140 - 8c - Œ - OE ligature - 141 - 8d - (removed) - (not used) - 142 - 8e - Ž - Z dieresis - 143 - 8f - (removed) - (not used) - 144 - 90 - (removed) - (not used) - 145 - 91 - ‘ - l s. q - 146 - 92 - ’ - r s. q - 147 - 93 - “ - l d q - 148 - 94 - ” - r d q - 149 - 95 - • - bullet - 150 - 96 - – - en dash - 151 - 97 - — - em dash - 152 - 98 - ˜ - tilde accent - 153 - 99 - ™ - trademark - 154 - 9a - š - s Hacek - 155 - 9b - › - r s. guillemet - 156 - 9c - œ - oe ligature - 157 - 9d - (removed) - (not used) - 158 - 9e - ž - z dieresis - 159 - 9f - Ÿ - Y dieresis - - --- 5.5 URL format -------------------------------------------------o - - - An `absolute` URL has a 'protocol' or 'scheme', a 'network location' or 'hostname', and, optional 'path', 'parameters', 'query' and 'fragment' segments. Thus, an absolute URL has this generic structure: - - (scheme) : (//network location) /(path) ;(parameters) ?(query) #(fragment) - - The schemes can only contain letters, digits, '+', '.' and '-'. Hostname is the portion after the '//' and up to the first '/' (if any; else, up to the end) when ':' is followed by a '//' (e.g., 'abc.com' in 'ftp://abc.com/def'); otherwise, it consists of everything after the ':' (e.g., 'def@abc.com' in mailto:def@abc.com'). - - `Relative` URLs do not have explicit schemes and network locations; such values are inherited from a `base` URL. - - --- 5.6 Brief on htmLawed code -------------------------------------o - - - Much of the code's logic and reasoning can be understood from the documentation above. - - The *output* of htmLawed is a text string containing the processed input. There is no custom error tracking. - - *Function arguments* for htmLawed are: - - * '$in' - 1st argument; a text string; the *input text* to be processed. Any extraneous slashes added by PHP when `magic quotes` are enabled should be removed beforehand using PHP's 'stripslashes()' function. - - * '$config' - 2nd argument; an associative array; optional (named '$C' in htmLawed code). The array has keys with names like 'balance' and 'keep_bad', and the values, which can be boolean, string, or array, depending on the key, are read to accordingly set the *configurable parameters* (indicated by the keys). All configurable parameters receive some default value if the value to be used is not specified by the user through '$config'. `Finalized` '$config' is thus a filtered and possibly larger array. - - * '$spec' - 3rd argument; a text string; optional. The string has rules, written in an htmLawed-designated format, *specifying* element-specific attribute and attribute value restrictions. Function 'hl_spec()' is used to convert the string to an associative-array for internal use. `Finalized` '$spec' is thus an array. - - `Finalized` '$config' and '$spec' are made *global variables* while htmLawed is at work. Values of any pre-existing global variables with same names are noted, and their values are restored after htmLawed finishes processing the input (to capture the `finalized` values, the 'show_settings' parameter of '$config' should be used). Depending on '$config', another global variable 'hl_Ids', to track 'id' attribute values for uniqueness, may be set. Unlike the other two variables, this one is not reset (or unset) post-processing. - - Except for the main function 'htmLawed()' and the functions 'kses()' and 'kses_hook()', htmLawed's functions are *name-spaced* using the 'hl_' prefix. The *functions* and their roles are: - - * 'hl_attrval' - checking attribute values against $spec - * 'hl_bal' - tag balancing - * 'hl_cmtcd' - handling CDATA sections and HTML comments - * 'hl_ent' - entity handling - * 'hl_prot' - checking a URL scheme/protocol - * 'hl_regex' - checking syntax of a regular expression - * 'hl_spec' - converting user-supplied $spec value to one used by htmLawed internally - * 'hl_tag' - handling tags - * 'hl_tag2' - transforming tags - * 'hl_tidy' - compact/beautify HTML - * 'hl_version' - reporting htmLawed version - * 'htmLawed' - main function - * 'kses' - main function of 'kses' - * 'kses_hook' - hook function of 'kses' - - The last two are for compatibility with pre-existing code using the 'kses' script. htmLawed's 'kses()' basically passes on the filtering task to 'htmLawed()' function after deciphering '$config' and '$spec' from the argument values supplied to it. 'kses_hook()' is an empty function and is meant for being filled with custom code if the 'kses' script users were using one. - - 'htmLawed()' finalizes '$spec' (with the help of 'hl_spec()') and '$config', and globalizes them. Finalization of '$config' involves setting default values if an inappropriate or invalid one is supplied. This includes calling 'hl_regex()' to check well-formedness of regular expression patterns if such expressions are user-supplied through '$config'. 'htmLawed()' then removes invalid characters like nulls and 'x01' and appropriately handles entities using 'hl_ent()'. HTML comments and CDATA sections are identified and treated as per '$config' with the help of 'hl_cmtcd()'. When retained, the '<' and '>' characters identifying them, and the '<', '>' and '&' characters inside them, are replaced with control characters (code-points '1' to '5') till any tag balancing is completed. - - After this `initial processing` 'htmLawed()' identifies tags using regex and processes them with the help of 'hl_tag()' -- a large function that analyzes tag content, filtering it as per HTML standards, '$config' and '$spec'. Among other things, 'hl_tag()' transforms deprecated elements using 'hl_tag2()', removes attributes from closing tags, checks attribute values as per '$spec' rules using 'hl_attrval()', and checks URL protocols using 'hl_prot()'. 'htmLawed()' performs tag balancing and nesting checks with a call to 'hl_bal()', and optionally compacts/beautifies the output with proper white-spacing with a call to 'hl_tidy()'. The latter temporarily replaces white-space, and '<', '>' and '&' characters inside 'pre', 'script' and 'textarea' elements, and HTML comments and CDATA sections with control characters (code-points '1' to '5', and '7'). - - htmLawed permits the use of custom code or *hook functions* at two stages. The first, called inside 'htmLawed()', allows the input text as well as the finalized $config and $spec values to be altered right after the initial processing (see section:- #3.7). The second is called by 'hl_tag()' once the tag content is finalized (see section:- #3.4.9). - - Being dictated by the external and stable HTML standard, htmLawed's objective is very clear-cut and less concerned with tweakability. The code is only minimally annotated with comments -- it is not meant to instruct; PHP developers familiar with the HTML specs will see the logic, and others can always refer to the htmLawed documentation. The compact structuring of the statements is meant to aid in quickly grasping the logic, at least when viewed with code syntax highlighted. - -___________________________________________________________________oo - - -@@description: htmLawed PHP software is a free, open-source, customizable HTML input purifier and filter -@@encoding: utf-8 -@@keywords: htmLawed, HTM, HTML, HTML Tidy, converter, filter, formatter, purifier, sanitizer, XSS, input, PHP, software, code, script, security, cross-site scripting, hack, sanitize, remove, standards, tags, attributes, elements -@@language: en +/* +htmLawed_README.txt, 29 August 2013 +htmLawed 1.1.16, 29 August 2013 +Copyright Santosh Patnaik +Dual licensed with LGPL 3 and GPL 2+ +A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed +*/ + + +== Content ========================================================== + + +1 About htmLawed + 1.1 Example uses + 1.2 Features + 1.3 History + 1.4 License & copyright + 1.5 Terms used here +2 Usage + 2.1 Simple + 2.2 Configuring htmLawed using the '$config' parameter + 2.3 Extra HTML specifications using the '$spec' parameter + 2.4 Performance time & memory usage + 2.5 Some security risks to keep in mind + 2.6 Use without modifying old 'kses()' code + 2.7 Tolerance for ill-written HTML + 2.8 Limitations & work-arounds + 2.9 Examples of usage +3 Details + 3.1 Invalid/dangerous characters + 3.2 Character references/entities + 3.3 HTML elements + 3.3.1 HTML comments and 'CDATA' sections + 3.3.2 Tag-transformation for better XHTML-Strict + 3.3.3 Tag balancing and proper nesting + 3.3.4 Elements requiring child elements + 3.3.5 Beautify or compact HTML + 3.4 Attributes + 3.4.1 Auto-addition of XHTML-required attributes + 3.4.2 Duplicate/invalid 'id' values + 3.4.3 URL schemes (protocols) and scripts in attribute values + 3.4.4 Absolute & relative URLs + 3.4.5 Lower-cased, standard attribute values + 3.4.6 Transformation of deprecated attributes + 3.4.7 Anti-spam & 'href' + 3.4.8 Inline style properties + 3.4.9 Hook function for tag content + 3.5 Simple configuration directive for most valid XHTML + 3.6 Simple configuration directive for most `safe` HTML + 3.7 Using a hook function + 3.8 Obtaining `finalized` parameter values + 3.9 Retaining non-HTML tags in input with mixed markup +4 Other + 4.1 Support + 4.2 Known issues + 4.3 Change-log + 4.4 Testing + 4.5 Upgrade, & old versions + 4.6 Comparison with 'HTMLPurifier' + 4.7 Use through application plug-ins/modules + 4.8 Use in non-PHP applications + 4.9 Donate + 4.10 Acknowledgements +5 Appendices + 5.1 Characters discouraged in HTML + 5.2 Valid attribute-element combinations + 5.3 CSS 2.1 properties accepting URLs + 5.4 Microsoft Windows 1252 character replacements + 5.5 URL format + 5.6 Brief on htmLawed code + + +== 1 About htmLawed ================================================ + + + htmLawed is a PHP script to process text with HTML markup to make it more compliant with HTML standards and administrative policies. It works by making HTML well-formed with balanced and properly nested tags, neutralizing code that may be used for cross-site scripting (XSS) attacks, allowing only specified HTML tags and attributes, and so on. Such `lawing in` of HTML in text used in (X)HTML or XML documents ensures that it is in accordance with the aesthetics, safety and usability requirements set by administrators. + + htmLawed is highly customizable, and fast with low memory usage. Its free and open-source code is in one small file, does not require extensions or libraries, and works in older versions of PHP as well. It is a good alternative to the HTML Tidy:- http://tidy.sourceforge.net application. + + +-- 1.1 Example uses ------------------------------------------------ + + + * Filtering of text submitted as comments on blogs to allow only certain HTML elements + + * Making RSS/Atom newsfeed item-content standard-compliant: often one uses an excerpt from an HTML document for the content, and with unbalanced tags, non-numerical entities, etc., such excerpts may not be XML-compliant + + * Text processing for stricter XML standard-compliance: e.g., to have lowercased 'x' in hexadecimal numeric entities becomes necessary if an XHTML document with MathML content needs to be served as 'application/xml' + + * Scraping text or data from web-pages + + * Pretty-printing HTML code + + +-- 1.2 Features ---------------------------------------------------o + + + Key: '*' security feature, '^' standard compliance, '~' requires setting right options, '`' different from 'Kses' + + * make input more *secure* and *standard-compliant* + * use for HTML 4, XHTML 1.0 or 1.1, or even generic *XML* documents ^~` + + * *beautify* or *compact* HTML ^~` + + * can *restrict elements* ^~` + * ensures proper closure of empty elements like 'img' ^` + * *transform deprecated elements* like 'u' ^~` + * HTML *comments* and 'CDATA' sections can be permitted ^~` + * elements like 'script', 'object' and 'form' can be permitted ~ + + * *restrict attributes*, including *element-specifically* ^~` + * remove *invalid attributes* ^` + * element and attribute names are *lower-cased* ^ + * provide *required attributes*, like 'alt' for 'image' ^` + * *transforms deprecated attributes* ^~` + * attributes *declared only once* ^` + + * *restrict attribute values*, including *element-specifically* ^~` + * a value is declared for `empty` (`minimized`) attributes like 'checked' ^ + * check for potentially dangerous attribute values *~ + * ensure *unique* 'id' attribute values ^~` + * *double-quote* attribute values ^ + * lower-case *standard attribute values* like 'password' ^` + * permit custom, non-standard attributes as well as custom rules for standard attributes ~` + + * *attribute-specific URL protocol/scheme restriction* *~` + * disable *dynamic expressions* in 'style' values *~` + + * neutralize invalid named character entities ^` + * *convert* hexadecimal numeric entities to decimal ones, or vice versa ^~` + * convert named entities to numeric ones for generic XML use ^~` + + * remove *null* characters * + * neutralize potentially dangerous proprietary Netscape *Javascript entities* * + * replace potentially dangerous *soft-hyphen* character in URL-accepting attribute values with spaces * + + * remove common *invalid characters* not allowed in HTML or XML ^` + * replace *characters from Microsoft applications* like 'Word' that are discouraged in HTML or XML ^~` + * neutralize entities for characters invalid or discouraged in HTML or XML ^` + * appropriately neutralize '<', '&', '"', and '>' characters ^*` + + * understands improperly spaced tag content (like, spread over more than a line) and properly spaces them ` + * attempts to *balance tags* for well-formedness ^~` + * understands when *omitable closing tags* like '

    ' (allowed in HTML 4, transitional, e.g.) are missing ^~` + * attempts to permit only *validly nested tags* ^~` + * option to *remove or neutralize bad content* ^~` + * attempts to *rectify common errors of plain-text misplacement* (e.g., directly inside 'blockquote') ^~` + + * fast, *non-OOP* code of ~45 kb incurring peak basal memory usage of ~0.5 MB + * *compatible* with pre-existing code using 'Kses' (the filter used by 'WordPress') + + * optional *anti-spam* measures such as addition of 'rel="nofollow"' and link-disabling ~` + * optionally makes *relative URLs absolute*, and vice versa ~` + + * optionally mark '&' to identify the entities for '&', '<' and '>' introduced by htmLawed ~` + + * allows deployment of powerful *hook functions* to *inject* HTML, *consolidate* 'style' attributes to 'class', finely check attribute values, etc. ~` + + * *independent of character encoding* of input and does not affect it + + * *tolerance for ill-written HTML* to a certain degree + + +-- 1.3 History ----------------------------------------------------o + + + htmLawed was created in 2007 for use with 'LabWiki', a wiki software developed at PHP Labware, as a suitable software could not be found. Existing PHP software like 'Kses' and 'HTMLPurifier' were deemed inadequate, slow, resource-intensive, or dependent on an extension or external application like 'HTML Tidy'. The core logic of htmLawed, that of identifying HTML elements and attributes, was based on the 'Kses' (version 0.2.2) HTML filter software of Ulf Harnhammar (it can still be used with code that uses 'Kses'; see section:- #2.6.). + + See section:- #4.3 for a detailed log of changes in htmLawed over the years, and section:- #4.10 for acknowledgements. + + +-- 1.4 License & copyright ----------------------------------------o + + + htmLawed is free and open-source software dual copyrighted by Santosh Patnaik, MD, PhD, and licensed under LGPL license version 3:- http://www.gnu.org/licenses/lgpl-3.0.txt, and GPL license version 2:- http://www.gnu.org/licenses/gpl-2.0.txt (or later). + + +-- 1.5 Terms used here --------------------------------------------o + + + In this document, only HTML body-level elements are considered. htmLawed does not have support for head-level elements, 'body', and the frame-level elements, 'frameset', 'frame' and 'noframes', and these elements are ignored here. + + * `administrator` - or admin; person setting up the code that utilizes htmLawed; also, `user` + * `attributes` - name-value pairs like 'href="http://x.com"' in opening tags + * `author` - see `writer` + * `character` - atomic unit of text; internally represented by a numeric `code-point` as specified by the `encoding` or `charset` in use + * `entity` - markup like '>' and ' ' used to refer to a character + * `element` - HTML element like 'a' and 'img' + * `element content` - content between the opening and closing tags of an element, like 'click' of the 'click' element + * `HTML` - implies XHTML unless specified otherwise + * `HTML body` - Complete HTML documents typically have a `head` and a `body` container. Information in `head` specifies title of the document, etc., whereas that in the body informs what is to be displayed on a web-page; it is only the elements for `body`, except 'frames', 'frameset' and 'noframes' that htmLawed is concerned with + * `input` - text given to htmLawed to process + * `processing` - involves filtering, correction, etc., of input + * `safe` - absence or reduction of certain characters and HTML elements and attributes in HTML of text that can otherwise potentially, and circumstantially, expose text readers to security vulnerabilities like cross-site scripting attacks (XSS) + * `scheme` - a URL protocol like 'http' and 'ftp' + * `specifications` - standard specifications, for HTML4, HTML5, Ruby, etc. + * `style property` - terms like 'border' and 'height' for which declarations are made in values for the 'style' attribute of elements + * `tag` - markers like '' and '' delineating element content; the opening tag can contain attributes + * `tag content` - consists of tag markers '<' and '>', element names like 'div', and possibly attributes + * `user` - administrator + * `writer` - end-user like a blog commenter providing the input that is to be processed; also, `author` + + +-- 1.6 Availability ------------------------------------------------o + + + htmLawed can be downloaded for free at its website:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed. Besides the 'htmLawed.php' file, the download has the htmLawed documentation (this document) in plain text:- htmLawed_README.txt and HTML:- htmLawed_README.htm formats, a script for testing:- htmLawedTest.php, and a text file for test-cases:- htmLawed_TESTCASE.txt. htmLawed is also available as a PHP class (OOP code) on its website. + + +== 2 Usage ========================================================oo + + + htmLawed works in PHP version 4.4 or higher. Either 'include()' the 'htmLawed.php' file, or copy-paste the entire code. To use with PHP 4.3, have the following code included: + + if(!function_exists('ctype_digit')){ + function ctype_digit($var){ + return ((int) $var == $var); + } + } + + +-- 2.1 Simple ------------------------------------------------------ + + + The input text to be processed, '$text', is passed as an argument of type string; 'htmLawed()' returns the processed string: + + $processed = htmLawed($text); + + With the 'htmLawed class' (section:- #1.6), usage is: + + $processed = htmLawed::hl($text); + + *Notes*: (1) If input is from a '$_GET' or '$_POST' value, and 'magic quotes' are enabled on the PHP setup, run 'stripslashes()' on the input before passing to htmLawed. (2) htmLawed does not have support for head-level elements, 'body', and the frame-level elements, 'frameset', 'frame' and 'noframes'. + + By default, htmLawed will process the text allowing all valid HTML elements/tags, secure URL scheme/CSS style properties, etc. It will allow 'CDATA' sections and HTML comments, balance tags, and ensure proper nesting of elements. Such actions can be configured using two other optional arguments -- '$config' and '$spec': + + $processed = htmLawed($text, $config, $spec); + + The '$config' and '$spec' arguments are detailed below. Some examples are shown in section:- #2.9. For maximum protection against 'XSS' and other scripting attacks (e.g., by disallowing Javascript code), consider using the 'safe' parameter; see section:- #3.6. + + +-- 2.2 Configuring htmLawed using the '$config' parameter ---------o + + + '$config' instructs htmLawed on how to tackle certain tasks. When '$config' is not specified, or not set as an array (e.g., '$config = 1'), htmLawed will take default actions. One or many of the task-action or value-specification pairs can be specified in '$config' as array key-value pairs. If a parameter is not specified, htmLawed will use the default value/action indicated further below. + + $config = array('comment'=>0, 'cdata'=>1); + $processed = htmLawed($text, $config); + + Or, + + $processed = htmLawed($text, array('comment'=>0, 'cdata'=>1)); + + Below are the possible value-specification combinations. In PHP code, values that are integers should not be quoted and should be used as numeric types (unless meant as string/text). + + Key: '*' default, '^' different default when htmLawed is used in the Kses-compatible mode (see section:- #2.6), '~' different default when 'valid_xhtml' is set to '1' (see section:- #3.5), '"' different default when 'safe' is set to '1' (see section:- #3.6) + + *abs_url* + Make URLs absolute or relative; '$config["base_url"]' needs to be set; see section:- #3.4.4 + + '-1' - make relative + '0' - no action * + '1' - make absolute + + *and_mark* + Mark '&' characters in the original input; see section:- #3.2 + + *anti_link_spam* + Anti-link-spam measure; see section:- #3.4.7 + + '0' - no measure taken * + `array("regex1", "regex2")` - will ensure a 'rel' attribute with 'nofollow' in its value in case the 'href' attribute value matches the regular expression pattern 'regex1', and/or will remove 'href' if its value matches the regular expression pattern 'regex2'. E.g., 'array("/./", "/://\W*(?!(abc\.com|xyz\.org))/")'; see section:- #3.4.7 for more. + + *anti_mail_spam* + Anti-mail-spam measure; see section:- #3.4.7 + + '0' - no measure taken * + `word` - '@' in mail address in 'href' attribute value is replaced with specified `word` + + *balance* + Balance tags for well-formedness and proper nesting; see section:- #3.3.3 + + '0' - no + '1' - yes * + + *base_url* + Base URL value that needs to be set if '$config["abs_url"]' is not '0'; see section:- #3.4.4 + + *cdata* + Handling of 'CDATA' sections; see section:- #3.3.1 + + '0' - don't consider 'CDATA' sections as markup and proceed as if plain text ^" + '1' - remove + '2' - allow, but neutralize any '<', '>', and '&' inside by converting them to named entities + '3' - allow * + + *clean_ms_char* + Replace discouraged characters introduced by Microsoft Word, etc.; see section:- #3.1 + + '0' - no * + '1' - yes + '2' - yes, but replace special single & double quotes with ordinary ones + + *comment* + Handling of HTML comments; see section:- #3.3.1 + + '0' - don't consider comments as markup and proceed as if plain text ^" + '1' - remove + '2' - allow, but neutralize any '<', '>', and '&' inside by converting to named entities + '3' - allow * + + *css_expression* + Allow dynamic CSS expression by not removing the expression from CSS property values in 'style' attributes; see section:- #3.4.8 + + '0' - remove * + '1' - allow + + *deny_attribute* + Denied HTML attributes; see section:- #3.4 + + '0' - none * + `string` - dictated by values in `string` + 'on*' (like 'onfocus') attributes not allowed - " + + *direct_nest_list* + Allow direct nesting of a list within another without requiring it to be a list item; see section:- #3.3.4 + + '0' - no * + '1' - yes + + *elements* + Allowed HTML elements; see section:- #3.3 + + '* -center -dir -font -isindex -menu -s -strike -u' - ~ + 'applet, embed, iframe, object, script' not allowed - " + + *hexdec_entity* + Allow hexadecimal numeric entities and do not convert to the more widely accepted decimal ones, or convert decimal to hexadecimal ones; see section:- #3.2 + + '0' - no + '1' - yes * + '2' - convert decimal to hexadecimal ones + + *hook* + Name of an optional hook function to alter the input string, '$config' or '$spec' before htmLawed starts its main work; see section:- #3.7 + + '0' - no hook function * + `name` - `name` is name of the hook function ('kses_hook' ^) + + *hook_tag* + Name of an optional hook function to alter tag content finalized by htmLawed; see section:- #3.4.9 + + '0' - no hook function * + `name` - `name` is name of the hook function + + *keep_bad* + Neutralize bad tags by converting '<' and '>' to entities, or remove them; see section:- #3.3.3 + + '0' - remove ^ + '1' - neutralize both tags and element content + '2' - remove tags but neutralize element content + '3' and '4' - like '1' and '2' but remove if text ('pcdata') is invalid in parent element + '5' and '6' * - like '3' and '4' but line-breaks, tabs and spaces are left + + *lc_std_val* + For XHTML compliance, predefined, standard attribute values, like 'get' for the 'method' attribute of 'form', must be lowercased; see section:- #3.4.5 + + '0' - no + '1' - yes * + + *make_tag_strict* + Transform/remove these non-strict XHTML elements, even if they are allowed by the admin: 'applet' 'center' 'dir' 'embed' 'font' 'isindex' 'menu' 's' 'strike' 'u'; see section:- #3.3.2 + + '0' - no ^ + '1' - yes, but leave 'applet', 'embed' and 'isindex' elements that currently can't be transformed * + '2' - yes, removing 'applet', 'embed' and 'isindex' elements and their contents (nested elements remain) ~ + + *named_entity* + Allow non-universal named HTML entities, or convert to numeric ones; see section:- #3.2 + + '0' - convert + '1' - allow * + + *no_deprecated_attr* + Allow deprecated attributes or transform them; see section:- #3.4.6 + + '0' - allow ^ + '1' - transform, but 'name' attributes for 'a' and 'map' are retained * + '2' - transform + + *parent* + Name of the parent element, possibly imagined, that will hold the input; see section:- #3.3 + + *safe* + Magic parameter to make input the most secure against XSS without needing to specify other relevant '$config' parameters; see section:- #3.6 + + '0' - no * + '1' - will auto-adjust other relevant '$config' parameters (indicated by '"' in this list) + + *schemes* + Array of attribute-specific, comma-separated, lower-cased list of schemes (protocols) allowed in attributes accepting URLs (or '!' to `deny` any URL); '*' covers all unspecified attributes; see section:- #3.4.3 + + 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https' * + '*: ftp, gopher, http, https, mailto, news, nntp, telnet' ^ + 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; style: !; *:file, http, https' " + + *show_setting* + Name of a PHP variable to assign the `finalized` '$config' and '$spec' values; see section:- #3.8 + + *style_pass* + Do not look at 'style' attribute values, letting them through without any alteration + + '0' - no * + '1' - htmLawed will let through any 'style' value; see section:- #3.4.8 + + *tidy* + Beautify or compact HTML code; see section:- #3.3.5 + + '-1' - compact + '0' - no * + '1' or 'string' - beautify (custom format specified by 'string') + + *unique_ids* + 'id' attribute value checks; see section:- #3.4.2 + + '0' - no ^ + '1' - remove duplicate and/or invalid ones * + `word` - remove invalid ones and replace duplicate ones with new and unique ones based on the `word`; the admin-specified `word`, like 'my_', should begin with a letter (a-z) and can contain letters, digits, '.', '_', '-', and ':'. + + *valid_xhtml* + Magic parameter to make input the most valid XHTML without needing to specify other relevant '$config' parameters; see section:- #3.5 + + '0' - no * + '1' - will auto-adjust other relevant '$config' parameters (indicated by '~' in this list) + + *xml:lang* + Auto-adding 'xml:lang' attribute; see section:- #3.4.1 + + '0' - no * + '1' - add if 'lang' attribute is present + '2' - add if 'lang' attribute is present, and remove 'lang' ~ + + +-- 2.3 Extra HTML specifications using the $spec parameter --------o + + + The '$spec' argument of htmLawed can be used to disallow an otherwise legal attribute for an element, or to restrict the attribute's values. This can also be helpful as a security measure (e.g., in certain versions of browsers, certain values can cause buffer overflows and denial of service attacks), or in enforcing admin policies. '$spec' is specified as a string of text containing one or more `rules`, with multiple rules separated from each other by a semi-colon (';'). E.g., + + $spec = 'i=-*; td, tr=style, id, -*; a=id(match="/[a-z][a-z\d.:\-`"]*/i"/minval=2), href(maxlen=100/minlen=34); img=-width,-alt'; + $processed = htmLawed($text, $config, $spec); + + Or, + + $processed = htmLawed($text, $config, 'i=-*; td, tr=style, id, -*; a=id(match="/[a-z][a-z\d.:\-`"]*/i"/minval=2), href(maxlen=100/minlen=34); img=-width,-alt'); + + A rule begins with an HTML *element* name(s) (`rule-element`), for which the rule applies, followed by an equal ('=') sign. A rule-element may represent multiple elements if comma (,)-separated element names are used. E.g., 'th,td,tr='. + + Rest of the rule consists of comma-separated HTML *attribute names*. A minus ('-') character before an attribute means that the attribute is not permitted inside the rule-element. E.g., '-width'. To deny all attributes, '-*' can be used. + + Following shows examples of rule excerpts with rule-element 'a' and the attributes that are being permitted: + + * 'a=' - all + * 'a=id' - all + * 'a=href, title, -id, -onclick' - all except 'id' and 'onclick' + * 'a=*, id, -id' - all except 'id' + * 'a=-*' - none + * 'a=-*, href, title' - none except 'href' and 'title' + * 'a=-*, -id, href, title' - none except 'href' and 'title' + + Rules regarding *attribute values* are optionally specified inside round brackets after attribute names in slash ('/')-separated `parameter = value` pairs. E.g., 'title(maxlen=30/minlen=5)'. None or one or more of the following parameters may be specified: + + * 'oneof' - one or more choices separated by '|' that the value should match; if only one choice is provided, then the value must match that choice + + * 'noneof' - one or more choices separated by '|' that the value should not match + + * 'maxlen' and 'minlen' - upper and lower limits for the number of characters in the attribute value; specified in numbers + + * 'maxval' and 'minval' - upper and lower limits for the numerical value specified in the attribute value; specified in numbers + + * 'match' and 'nomatch' - pattern that the attribute value should or should not match; specified as PHP/PCRE-compatible regular expressions with delimiters and possibly modifiers + + * 'default' - a value to force on the attribute if the value provided by the writer does not fit any of the specified parameters + + If 'default' is not set and the attribute value does not satisfy any of the specified parameters, then the attribute is removed. The 'default' value can also be used to force all attribute declarations to take the same value (by getting the values declared illegal by setting, e.g., 'maxlen' to '-1'). + + Examples with `input` '' are shown below. + + `Rule`: 'input=title(maxlen=60/minlen=6), value' + `Output`: '' + + `Rule`: 'input=title(), value(maxval=8/default=6)' + `Output`: '' + + `Rule`: 'input=title(nomatch=%w.d%i), value(match=%em%/default=6em)' + `Output`: '' + + `Rule`: 'input=title(oneof=height|depth/default=depth), value(noneof=5|6)' + `Output`: '' + + *Special characters*: The characters ';', ',', '/', '(', ')', '|', '~' and space have special meanings in the rules. Words in the rules that use such characters, or the characters themselves, should be `escaped` by enclosing in pairs of double-quotes ('"'). A back-tick ('`') can be used to escape a literal '"'. An example rule illustrating this is 'input=value(maxlen=30/match="/^\w/"/default="your `"ID`"")'. + + *Note*: To deny an attribute for all elements for which it is legal, '$config["deny_attribute"]' (see section:- #3.4) can be used instead of '$spec'. Also, attributes can be allowed element-specifically through '$spec' while being denied globally through '$config["deny_attribute"]'. The 'hook_tag' parameter (section:- #3.4.9) can also be possibly used to implement a functionality like that achieved using '$spec' functionality. + + '$spec' can also be used to permit custom, non-standard attributes as well as custom rules for standard attributes. Thus, the following value of '$spec' will permit the custom uses of the standard 'rel' attribute in 'input' (not permitted as per standards) and of a non-standard attribute, 'vFlag', in 'img'. + + $spec = 'img=vFlag; input=rel' + + The attribute names can contain alphabets, colons (:) and hyphens (-), but they must start with an alphabet. + + +-- 2.4 Performance time & memory usage ----------------------------o + + + The time and memory consumed during text processing by htmLawed depends on its configuration, the size of the input, and the amount, nestedness and well-formedness of the HTML markup within the input. In particular, tag balancing and beautification each can increase the processing time by about a quarter. + + The htmLawed demo:- htmLawedTest.php can be used to evaluate the performance and effects of different types of input and '$config'. + + +-- 2.5 Some security risks to keep in mind ------------------------o + + + When setting the parameters/arguments (like those to allow certain HTML elements) for use with htmLawed, one should bear in mind that the setting may let through potentially `dangerous` HTML code which is meant to steal user-data, deface a website, render a page non-functional, etc. Unless end-users, either people or software, supplying the content are completely trusted, security issues arising from the degree of HTML usage permitted through htmLawed's setting should be considered. For example, following increase security risks: + + * Allowing 'script', 'applet', 'embed', 'iframe' or 'object' elements, or certain of their attributes like 'allowscriptaccess' + + * Allowing HTML comments (some Internet Explorer versions are vulnerable with, e.g., '' + + * Allowing dynamic CSS expressions (some Internet Explorer versions are vulnerable) + + * Allowing the 'style' attribute + + To remove `unsecure` HTML, code-developers using htmLawed must set '$config' appropriately. E.g., '$config["elements"] = "* -script"' to deny the 'script' element (section:- #3.3), '$config["safe"] = 1' to auto-configure ceratin htmLawed parameters for maximizing security (section:- #3.6), etc. + + Permitting the '*style*' attribute brings in risks of `click-jacking`, `phishing`, web-page overlays, etc., `even` when the 'safe' parameter is enabled (see section:- #3.6). Except for URLs and a few other things like CSS dynamic expressions, htmLawed currently does not check every CSS style property. It does provide ways for the code-developer implementing htmLawed to do such checks through htmLawed's '$spec' argument, and through the 'hook_tag' parameter (see section:- #3.4.8 for more). Disallowing 'style' completely and relying on CSS classes and stylesheet files is recommended. + + htmLawed does not check or correct the character *encoding* of the input it receives. In conjunction with permissive circumstances, such as when the character encoding is left undefined through HTTP headers or HTML 'meta' tags, this can allow for an exploit (like Google's `UTF-7/XSS` vulnerability of the past). + + +-- 2.6 Use without modifying old 'kses()' code --------------------o + + + The 'Kses' PHP script is used by many applications (like 'WordPress'). It is possible to have such applications use htmLawed instead, since it is compatible with code that calls the 'kses()' function declared in the 'Kses' file (usually named 'kses.php'). E.g., application code like this will continue to work after replacing 'Kses' with htmLawed: + + $comment_filtered = kses($comment_input, array('a'=>array(), 'b'=>array(), 'i'=>array())); + + For some of the '$config' parameters, htmLawed will use values other than the default ones. These are indicated by '^' in section:- #2.2. To force htmLawed to use other values, function 'kses()' in the htmLawed code should be edited -- a few configurable parameters/variables need to be changed. + + If the application uses a 'Kses' file that has the 'kses()' function declared, then, to have the application use htmLawed instead of 'Kses', simply rename 'htmLawed.php' (to 'kses.php', e.g.) and replace the 'Kses' file (or just replace the code in the 'Kses' file with the htmLawed code). If the 'kses()' function in the 'Kses' file had been renamed by the application developer (e.g., in 'WordPress', it is named 'wp_kses()'), then appropriately rename the 'kses()' function in the htmLawed code. + + If the 'Kses' file used by the application has been highly altered by the application developers, then one may need a different approach. E.g., with 'WordPress', it is best to copy the htmLawed code to 'wp_includes/kses.php', rename the newly added function 'kses()' to 'wp_kses()', and delete the code for the original 'wp_kses()' function. + + If the 'Kses' code has a non-empty hook function (e.g., 'wp_kses_hook()' in case of 'WordPress'), then the code for htmLawed's 'kses_hook()' function should be appropriately edited. However, the requirement of the hook function should be re-evaluated considering that htmLawed has extra capabilities. With 'WordPress', the hook function is an essential one. The following code is suggested for the htmLawed 'kses_hook()' in case of 'WordPress': + + function kses_hook($string, &$cf, &$spec){ + // kses compatibility + $allowed_html = $spec; + $allowed_protocols = array(); + foreach($cf['schemes'] as $v){ + foreach($v as $k2=>$v2){ + if(!in_array($k2, $allowed_protocols)){ + $allowed_protocols[] = $k2; + } + } + } + return wp_kses_hook($string, $allowed_html, $allowed_protocols); + // eof + } + + +-- 2.7 Tolerance for ill-written HTML -----------------------------o + + + htmLawed can work with ill-written HTML code in the input. However, HTML that is too ill-written may not be `read` as HTML, and may therefore get identified as mere plain text. Following statements indicate the degree of `looseness` that htmLawed can work with, and can be provided in instructions to writers: + + * Tags must be flanked by '<' and '>' with no '>' inside -- any needed '>' should be put in as '>'. It is possible for tag content (element name and attributes) to be spread over many lines instead of being on one. A space may be present between the tag content and '>', like '
    ' and '', but not after the '<'. + + * Element and attribute names need not be lower-cased. + + * Attribute string of elements may be liberally spaced with tabs, line-breaks, etc. + + * Attribute values may be single- and not double-quoted. + + * Left-padding of numeric entities (like, ' ', '&x07ff;') with '0' is okay as long as the number of characters between between the '&' and the ';' does not exceed 8. All entities must end with ';' though. + + * Named character entities must be properly cased. Thus, '≪' or '&TILDE;' will not be recognized as entities and will be `neutralized`. + + * HTML comments should not be inside element tags (they can be between tags), and should begin with ''. Characters like '<', '>', and '&' may be allowed inside depending on '$config', but any '-->' inside should be put in as '-->'. Any '--' inside will be automatically converted to '-', and a space will be added before the comment delimiter '-->'. + + * 'CDATA' sections should not be inside element tags, and can be in element content only if plain text is allowed for that element. They should begin with '<[CDATA[' and end with ']]>'. Characters like '<', '>', and '&' may be allowed inside depending on '$config', but any ']]>' inside should be put in as ']]>'. + + * For attribute values, character entities '<', '>' and '&' should be used instead of characters '<' and '>', and '&' (when '&' is not part of a character entity). This applies even for Javascript code in values of attributes like 'onclick'. + + * Characters '<', '>', '&' and '"' that are part of actual Javascript, etc., code in 'script' elements should be used as such and not be put in as entities like '>'. Otherwise, though the HTML will be valid, the code may fail to work. Further, if such characters have to be used, then they should be put inside 'CDATA' sections. + + * Simple instructions like "an opening tag cannot be present between two closing tags" and "nested elements should be closed in the reverse order of how they were opened" can help authors write balanced HTML. If tags are imbalanced, htmLawed will try to balance them, but in the process, depending on '$config["keep_bad"]', some code/text may be lost. + + * Input authors should be notified of admin-specified allowed elements, attributes, configuration values (like conversion of named entities to numeric ones), etc. + + * With '$config["unique_ids"]' not '0' and the 'id' attribute being permitted, writers should carefully avoid using duplicate or invalid 'id' values as even though htmLawed will correct/remove the values, the final output may not be the one desired. E.g., when '' is processed into +''. + + * Even if intended HTML is lost from an ill-written input, the processed output will be more secure and standard-compliant. + + * For URLs, unless '$config["scheme"]' is appropriately set, writers should avoid using escape characters or entities in schemes. E.g., 'http' (which many browsers will read as the harmless 'http') may be considered bad by htmLawed. + + * htmLawed will attempt to put plain text present directly inside 'blockquote', 'form', 'map' and 'noscript' elements (illegal as per the specifications) inside auto-generated 'div' elements. + + +-- 2.8 Limitations & work-arounds ---------------------------------o + + + htmLawed's main objective is to make the input text `more` standard-compliant, secure for readers, and free of HTML elements and attributes considered undesirable by the administrator. Some of its current limitations, regardless of this objective, are noted below along with work-arounds. + + It should be borne in mind that no browser application is 100% standard-compliant, and that some of the standard specifications (like asking for normalization of white-spacing within 'textarea' elements) are clearly wrong. Regarding security, note that `unsafe` HTML code is not legally invalid per se. + + * htmLawed is meant for input that goes into the 'body' of HTML documents. HTML's head-level elements are not supported, nor are the frameset elements 'frameset', 'frame' and 'noframes'. Content of the latter elements can, however, be individually filtered through htmLawed. + + * It cannot transform the non-standard 'embed' elements to the standard-compliant 'object' elements. Yet, it can allow 'embed' elements if permitted ('embed' is widely used and supported). Admins can certainly use the 'hook_tag' parameter (section:- #3.4.9) to deploy a custom embed-to-object converter function. + + * The only non-standard element that may be permitted is 'embed'; others like 'noembed' and 'nobr' cannot be permitted without modifying the htmLawed code. + + * It cannot handle input that has non-HTML code like 'SVG' and 'MathML'. One way around is to break the input into pieces and passing only those without non-HTML code to htmLawed. Another is described in section:- #3.9. A third way may be to some how take advantage of the '$config["and_mark"]' parameter (see section:- #3.2). + + * By default, htmLawed won't check many attribute values for standard compliance. E.g., 'width="20m"' with the dimension in non-standard 'm' is let through. Implementing universal and strict attribute value checks can make htmLawed slow and resource-intensive. Admins should look at the 'hook_tag' parameter (section:- #3.4.9) or '$spec' to enforce finer checks. + + * The attributes, deprecated (which can be transformed too) or not, that it supports are largely those that are in the specifications. Only a few of the proprietary attributes are supported. + + * Except for contained URLs and dynamic expressions (also optional), htmLawed does not check CSS style property values. Admins should look at using the 'hook_tag' parameter (section:- #3.4.9) or '$spec' for finer checks. Perhaps the best option is to disallow 'style' but allow 'class' attributes with the right 'oneof' or 'match' values for 'class', and have the various class style properties in '.css' CSS stylesheet files. + + * htmLawed does not parse emoticons, decode `BBcode`, or `wikify`, auto-converting text to proper HTML. Similarly, it won't convert line-breaks to 'br' elements. Such functions are beyond its purview. Admins should use other code to pre- or post-process the input for such purposes. + + * htmLawed cannot be used to have links force-opened in new windows (by auto-adding appropriate 'target' and 'onclick' attributes to 'a'). Admins should look at Javascript-based DOM-modifying solutions for this. Admins may also be able to use a custom hook function to enforce such checks ('hook_tag' parameter; see section:- #3.4.9). + + * Nesting-based checks are not possible. E.g., one cannot disallow 'p' elements specifically inside 'td' while permitting it elsewhere. Admins may be able to use a custom hook function to enforce such checks ('hook_tag' parameter; see section:- #3.4.9). + + * Except for optionally converting absolute or relative URLs to the other type, htmLawed will not alter URLs (e.g., to change the value of query strings or to convert 'http' to 'https'. Having absolute URLs may be a standard-requirement, e.g., when HTML is embedded in email messages, whereas altering URLs for other purposes is beyond htmLawed's goals. Admins may be able to use a custom hook function to enforce such checks ('hook_tag' parameter; see section:- #3.4.9). + + * Pairs of opening and closing tags that do not enclose any content (like '') are not removed. This may be against the standard specifications for certain elements (e.g., 'table'). However, presence of such standard-incompliant code will not break the display or layout of content. Admins can also use simple regex-based code to filter out such code. + + * htmLawed does not check for certain element orderings described in the standard specifications (e.g., in a 'table', 'tbody' is allowed before 'tfoot'). Admins may be able to use a custom hook function to enforce such checks ('hook_tag' parameter; see section:- #3.4.9). + + * htmLawed does not check the number of nested elements. E.g., it will allow two 'caption' elements in a 'table' element, illegal as per the specifications. Admins may be able to use a custom hook function to enforce such checks ('hook_tag' parameter; see section:- #3.4.9). + + * htmLawed might convert certain entities to actual characters and remove backslashes and CSS comment-markers ('/*') in 'style' attribute values in order to detect malicious HTML like crafted IE-specific dynamic expressions like 'expression...'. If this is too harsh, admins can allow CSS expressions through htmLawed core but then use a custom function through the 'hook_tag' parameter (section:- #3.4.9) to more specifically identify CSS expressions in the 'style' attribute values. Also, using '$config["style_pass"]', it is possible to have htmLawed pass 'style' attribute values without even looking at them (section:- #3.4.8). + + * htmLawed does not correct certain possible attribute-based security vulnerabilities (e.g., 'x'). These arise when browsers mis-identify markup in `escaped` text, defeating the very purpose of escaping text (a bad browser will read the given example as 'x'). + + * Because of poor Unicode support in PHP, htmLawed does not remove the `high value` HTML-invalid characters with multi-byte code-points. Such characters however are extremely unlikely to be in the input. (see section:- #3.1). + + * htmLawed does not check or correct the character encoding of the input it receives. In conjunction with permitting circumstances such as when the character encoding is left undefined through HTTP headers or HTML 'meta' tags, this can permit an exploit (like Google's `UTF-7/XSS` vulnerability of the past). Also, htmLawed can mangle input text if it is not well-formed in terms of character encoding. Administrators can consider using code available elsewhere to check well-formedness of input text characters to correct any defect. + + * htmLawed is expected to work with input texts in ASCII-compatible single byte encodings such as national variants of ASCII (like ISO-646-DE/German of the ISO 646 standard), extended ASCII variants (like ISO 8859-10/Turkish of the ISO 8859/ISO Latin standard), ISO 8859-based Windows variants (like Windows 1252), EBCDIC, Shift JIS (Japanese), GB-Roman (Chinese), and KS-Roman (Korean). It should also properly handle texts with variable byte encodings like UTF-7 (Unicode) and UTF-8 (Unicode). However, htmLawed may mangle input texts with double byte encodings like UTF-16 (Unicode), JIS X 0208:1997 (Japanese) and K SX 1001:1992 (Korean), or the UTF-32 (Unicode) quadruple byte encoding. If an input text has such an encoding, administrators can use PHP's iconv:- http://php.net/manual/en/book.iconv.php functions, or some other mean, to convert text to UTF-8 before passing it to htmLawed. + + * Like any script using PHP's PCRE regex functions, PHP setup-specific low PCRE limit values can cause htmLawed to at least partially fail with very long input texts. + + +-- 2.9 Examples of usage -------------------------------------------o + + + Safest, allowing only `safe` HTML markup -- + + $config = array('safe'=>1); + $out = htmLawed($in); + + Simplest, allowing all valid HTML markup except 'javascript:' -- + + $out = htmLawed($in); + + Allowing all valid HTML markup including 'javascript:' -- + + $config = array('schemes'=>'*:*'); + $out = htmLawed($in, $config); + + Allowing only 'safe' HTML and the elements 'a', 'em', and 'strong' -- + + $config = array('safe'=>1, 'elements'=>'a, em, strong'); + $out = htmLawed($in, $config); + + Not allowing elements 'script' and 'object' -- + + $config = array('elements'=>'* -script -object'); + $out = htmLawed($in, $config); + + Not allowing attributes 'id' and 'style' -- + + $config = array('deny_attribute'=>'id, style'); + $out = htmLawed($in, $config); + + Permitting only attributes 'title' and 'href' -- + + $config = array('deny_attribute'=>'* -title -href'); + $out = htmLawed($in, $config); + + Remove bad/disallowed tags altogether instead of converting them to entities -- + + $config = array('keep_bad'=>0); + $out = htmLawed($in, $config); + + Allowing attribute 'title' only in 'a' and not allowing attributes 'id', 'style', or scriptable `on*` attributes like 'onclick' -- + + $config = array('deny_attribute'=>'title, id, style, on*'); + $spec = 'a=title'; + $out = htmLawed($in, $config, $spec); + + Allowing a custom attribute, 'vFlag', in 'img' and permitting custom use of the standard attribute, 'rel', in 'input' -- + + $spec = 'img=vFlag; input=rel'; + $out = htmLawed($in, $config, $spec); + + Some case-studies are presented below. + + *1.* A blog administrator wants to allow only 'a', 'em', 'strike', 'strong' and 'u' in comments, but needs 'strike' and 'u' transformed to 'span' for better XHTML 1-strict compliance, and, he wants the 'a' links to point only to 'http' or 'https' resources: + + $processed = htmLawed($in, array('elements'=>'a, em, strike, strong, u', 'make_tag_strict'=>1, 'safe'=>1, 'schemes'=>'*:http, https'), 'a=href'); + + *2.* An author uses a custom-made web application to load content on his web-site. He is the only one using that application and the content he generates has all types of HTML, including scripts. The web application uses htmLawed primarily as a tool to correct errors that creep in while writing HTML and to take care of the occasional `bad` characters in copy-paste text introduced by Microsoft Office. The web application provides a preview before submitted input is added to the content. For the previewing process, htmLawed is set up as follows: + + $processed = htmLawed($in, array('css_expression'=>1, 'keep_bad'=>1, 'make_tag_strict'=>1, 'schemes'=>'*:*', 'valid_xhtml'=>1)); + + For the final submission process, 'keep_bad' is set to '6'. A value of '1' for the preview process allows the author to note and correct any HTML mistake without losing any of the typed text. + + *3.* A data-miner is scraping information in a specific table of similar web-pages and is collating the data rows, and uses htmLawed to reduce unnecessary markup and white-spaces: + + $processed = htmLawed($in, array('elements'=>'tr, td', 'tidy'=>-1), 'tr, td ='); + + +== 3 Details =====================================================oo + + +-- 3.1 Invalid/dangerous characters -------------------------------- + + + Valid characters (more correctly, their code-points) in HTML or XML are, hexadecimally, '9', 'a', 'd', '20' to 'd7ff', and 'e000' to '10ffff', except 'fffe' and 'ffff' (decimally, '9', '10', '13', '32' to '55295', and '57344' to '1114111', except '65534' and '65535'). htmLawed removes the invalid characters '0' to '8', 'b', 'c', and 'e' to '1f'. + + Because of PHP's poor native support for multi-byte characters, htmLawed cannot check for the remaining invalid code-points. However, for various reasons, it is very unlikely for any of those characters to be in the input. + + Characters that are discouraged (see section:- #5.1) but not invalid are not removed by htmLawed. + + It (function 'hl_tag()') also replaces the potentially dangerous (in some Mozilla [Firefox] and Opera browsers) soft-hyphen character (code-point, hexadecimally, 'ad', or decimally, '173') in attribute values with spaces. Where required, the characters '<', '>', '&', and '"' are converted to entities. + + With '$config["clean_ms_char"]' set as '1' or '2', many of the discouraged characters (decimal code-points '127' to '159' except '133') that many Microsoft applications incorrectly use (as per the 'Windows 1252' ['Cp-1252'] or a similar encoding system), and the character for decimal code-point '133', are converted to appropriate decimal numerical entities (or removed for a few cases)-- see appendix in section:- #5.4. This can help avoid some display issues arising from copying-pasting of content. + + With '$config["clean_ms_char"]' set as '2', characters for the hexadecimal code-points '82', '91', and '92' (for special single-quotes), and '84', '93', and '94' (for special double-quotes) are converted to ordinary single and double quotes respectively and not to entities. + + The character values are replaced with entities/characters and not character values referred to by the entities/characters to keep this task independent of the character-encoding of input text. + + The '$config["clean_ms_char"]' parameter should not be used if authors do not copy-paste Microsoft-created text, or if the input text is not believed to use the 'Windows 1252' ('Cp-1252') or a similar encoding like 'Cp-1251' (otherwise, for example when UTF-8 encoding is in use, Japanese or Korean characters can get mangled). Further, the input form and the web-pages displaying it or its content should have the character encoding appropriately marked-up. + + +-- 3.2 Character references/entities ------------------------------o + + + Valid character entities take the form '&*;' where '*' is '#x' followed by a hexadecimal number (hexadecimal numeric entity; like ' ' for non-breaking space), or alphanumeric like 'gt' (external or named entity; like ' ' for non-breaking space), or '#' followed by a number (decimal numeric entity; like ' ' for non-breaking space). Character entities referring to the soft-hyphen character (the '­' or '\xad' character; hexadecimal code-point 'ad' [decimal '173']) in URL-accepting attribute values are always replaced with spaces; soft-hyphens in attribute values introduce vulnerabilities in some older versions of the Opera and Mozilla [Firefox] browsers. + + htmLawed (function 'hl_ent()'): + + * Neutralizes entities with multiple leading zeroes or missing semi-colons (potentially dangerous) + + * Lowercases the 'X' (for XML-compliance) and 'A-F' of hexadecimal numeric entities + + * Neutralizes entities referring to characters that are HTML-invalid (see section:- #3.1) + + * Neutralizes entities referring to characters that are HTML-discouraged (code-points, hexadecimally, '7f' to '84', '86' to '9f', and 'fdd0' to 'fddf', or decimally, '127' to '132', '134' to '159', and '64991' to '64976'). Entities referring to the remaining discouraged characters (see section:- #5.1 for a full list) are let through. + + * Neutralizes named entities that are not in the specs. + + * Optionally converts valid HTML-specific named entities except '>', '<', '"', and '&' to decimal numeric ones (hexadecimal if $config["hexdec_entity"] is '2') for generic XML-compliance. For this, '$config["named_entity"]' should be '1'. + + * Optionally converts hexadecimal numeric entities to the more widely supported decimal ones. For this, '$config["hexdec_entity"]' should be '0'. + + * Optionally converts decimal numeric entities to the hexadecimal ones. For this, '$config["hexdec_entity"]' should be '2'. + + `Neutralization` refers to the `entitification` of '&' to '&'. + + *Note*: htmLawed does not convert entities to the actual characters represented by them; one can pass the htmLawed output through PHP's 'html_entity_decode' function:- http://www.php.net/html_entity_decode for that. + + *Note*: If '$config["and_mark"]' is set, and set to a value other than '0', then the '&' characters in the original input are replaced with the control character for the hexadecimal code-point '6' ('\x06'; '&' characters introduced by htmLawed, e.g., after converting '<' to '<', are not affected). This allows one to distinguish, say, an '>' introduced by htmLawed and an '>' put in by the input writer, and can be helpful in further processing of the htmLawed-processed text (e.g., to identify the character sequence 'o(><)o' to generate an emoticon image). When this feature is active, admins should ensure that the htmLawed output is not directly used in web pages or XML documents as the presence of the '\x06' can break documents. Before use in such documents, and preferably before any storage, any remaining '\x06' should be changed back to '&', e.g., with: + + $final = str_replace("\x06", '&', $prelim); + + Also, see section:- #3.9. + + +-- 3.3 HTML elements ----------------------------------------------o + + + htmLawed can be configured to allow only certain HTML elements (tags) in the input. Disallowed elements (just tag-content, and not element-content), based on '$config["keep_bad"]', are either `neutralized` (converted to plain text by entitification of '<' and '>') or removed. + + E.g., with only 'em' permitted: + + Input: + + My website is My website is a.com. + + Output, with '$config["keep_bad"]' not '0': + + My website is <a href="">a.com</a>. + + See section:- #3.3.3 for differences between the various non-zero '$config["keep_bad"]' values. + + htmLawed by default permits these 86 elements: + + a, abbr, acronym, address, applet, area, b, bdo, big, blockquote, br, button, caption, center, cite, code, col, colgroup, dd, del, dfn, dir, div, dl, dt, em, embed, fieldset, font, form, h1, h2, h3, h4, h5, h6, hr, i, iframe, img, input, ins, isindex, kbd, label, legend, li, map, menu, noscript, object, ol, optgroup, option, p, param, pre, q, rb, rbc, rp, rt, rtc, ruby, s, samp, script, select, small, span, strike, strong, sub, sup, table, tbody, td, textarea, tfoot, th, thead, tr, tt, u, ul, var + + Except for 'embed' (included because of its wide-spread use) and the Ruby elements ('rb', 'rbc', 'rp', 'rt', 'rtc', 'ruby'; part of XHTML 1.1), these are all the elements in the HTML 4/XHTML 1 specs. Strict-specific specs. exclude 'center', 'dir', 'font', 'isindex', 'menu', 's', 'strike', and 'u'. + + With '$config["safe"] = 1', the default set will exclude 'applet', 'embed', 'iframe', 'object' and 'script'; see section:- #3.6. + + When '$config["elements"]', which specifies allowed elements, is `properly` defined, and neither empty nor set to '0' or '*', the default set is not used. To have elements added to or removed from the default set, a '+/-' notation is used. E.g., '*-script-object' implies that only 'script' and 'object' are disallowed, whereas '*+embed' means that 'noembed' is also allowed. Elements can also be specified as comma separated names. E.g., 'a, b, i' means only 'a', 'b' and 'i' are permitted. In this notation, '*', '+' and '-' have no significance and can actually cause a mis-reading. + + Some more examples of '$config["elements"]' values indicating permitted elements (note that empty spaces are liberally allowed for clarity): + + * 'a, blockquote, code, em, strong' -- only 'a', 'blockquote', 'code', 'em', and 'strong' + * '*-script' -- all excluding 'script' + * '* -center -dir -font -isindex -menu -s -strike -u' -- only XHTML-Strict elements + * '*+noembed-script' -- all including 'noembed' excluding 'script' + + Some mis-usages (and the resulting permitted elements) that can be avoided: + + * '-*' -- none; instead of htmLawed, one might just use, e.g., the 'htmlspecialchars()' PHP function + * '*, -script' -- all except 'script'; admin probably meant '*-script' + * '-*, a, em, strong' -- all; admin probably meant 'a, em, strong' + * '*' -- all; admin need not have set 'elements' + * '*-form+form' -- all; a '+' will always over-ride any '-' + * '*, noembed' -- only 'noembed'; admin probably meant '*+noembed' + * 'a, +b, i' -- only 'a' and 'i'; admin probably meant 'a, b, i' + + Basically, when using the '+/-' notation, commas (',') should not be used, and vice versa, and '*' should be used with the former but not the latter. + + *Note*: Even if an element that is not in the default set is allowed through '$config["elements"]', like 'noembed' in the last example, it will eventually be removed during tag balancing unless such balancing is turned off ('$config["balance"]' set to '0'). Currently, the only way around this, which actually is simple, is to edit the various arrays in the function 'hl_bal()' to accommodate the element and its nesting properties. + + *A possibly second way to specify allowed elements* is to set '$config["parent"]' to an element name that supposedly will hold the input, and to set '$config["balance"]' to '1'. During tag balancing (see section:- #3.3.3), all elements that cannot legally nest inside the parent element will be removed. The parent element is auto-reset to 'div' if '$config["parent"]' is empty, 'body', or an element not in htmLawed's default set of 86 elements. + + `Tag transformation` is possible for improving XHTML-Strict compliance -- most of the deprecated elements are removed or converted to valid XHTML-Strict ones; see section:- #3.3.2. + + +.. 3.3.1 Handling of comments and CDATA sections ................... + + + 'CDATA' sections have the format '"...]]>', and HTML comments, '"... -->'. Neither HTML comments nor 'CDATA' sections can reside inside tags. HTML comments can exist anywhere else, but 'CDATA' sections can exist only where plain text is allowed (e.g., immediately inside 'td' element content but not immediately inside 'tr' element content). + + htmLawed (function 'hl_cmtcd()') handles HTML comments or 'CDATA' sections depending on the values of '$config["comment"]' or '$config["cdata"]'. If '0', such markup is not looked for and the text is processed like plain text. If '1', it is removed completely. If '2', it is preserved but any '<', '>' and '&' inside are changed to entities. If '3', they are left as such. + + Note that for the last two cases, HTML comments and 'CDATA' sections will always be removed from tag content (function 'hl_tag()'). + + Examples: + + Input: + Home + Output ('$config["comment"] = 0, $config["cdata"] = 2'): + <-- home link -->Home + Output ('$config["comment"] = 1, $config["cdata"] = 2'): + Home + Output ('$config["comment"] = 2, $config["cdata"] = 2'): + Home + Output ('$config["comment"] = 2, $config["cdata"] = 1'): + Home + Output ('$config["comment"] = 3, $config["cdata"] = 3'): + Home + + For standard-compliance, comments are given the form '', and any '--' in the content is made '-'. + + When '$config["safe"] = 1', CDATA sections and comments are considered plain text unless '$config["comment"]' or '$config["cdata"]' is explicitly specified; see section:- #3.6. + + +.. 3.3.2 Tag-transformation for better XHTML-Strict ................o + + + If '$config["make_tag_strict"]' is set and not '0', following non-XHTML-Strict elements (and attributes), even if admin-permitted, are mutated as indicated (element content remains intact; function 'hl_tag2()'): + + * applet - (based on '$config["make_tag_strict"]', unchanged ('1') or removed ('2')) + * center - 'div style="text-align: center;"' + * dir - 'ul' + * embed - (based on '$config["make_tag_strict"]', unchanged ('1') or removed ('2')) + * font (face, size, color) - 'span style="font-family: ; font-size: ; color: ;"' (size transformation reference:- http://style.cleverchimp.com/font_size_intervals/altintervals.html) + * isindex - (based on '$config["make_tag_strict"]', unchanged ('1') or removed ('2')) + * menu - 'ul' + * s - 'span style="text-decoration: line-through;"' + * strike - 'span style="text-decoration: line-through;"' + * u - 'span style="text-decoration: underline;"' + + For an element with a pre-existing 'style' attribute value, the extra style properties are appended. + + Example input: + +
    + The PHP software script used for this web-page web-page is htmLawedTest.php, from PHP Labware. +
    + + The output: + +
    + The PHP software script used for this web-page web-page is htmLawedTest.php, from PHP Labware. +
    + + +-- 3.3.3 Tag balancing and proper nesting -------------------------o + + + If '$config["balance"]' is set to '1', htmLawed (function 'hl_bal()') checks and corrects the input to have properly balanced tags and legal element content (i.e., any element nesting should be valid, and plain text may be present only in the content of elements that allow them). + + Depending on the value of '$config["keep_bad"]' (see section:- #2.2 and section:- #3.3), illegal content may be removed or neutralized to plain text by converting < and > to entities: + + '0' - remove; this option is available only to maintain Kses-compatibility and should not be used otherwise (see section:- #2.6) + '1' - neutralize tags and keep element content + '2' - remove tags but keep element content + '3' and '4' - like '1' and '2', but keep element content only if text ('pcdata') is valid in parent element as per specs + '5' and '6' - like '3' and '4', but line-breaks, tabs and spaces are left + + Example input (disallowing the 'p' element): + + <*> Pseudo-tags <*> + Non-HTML tag xml +

    + Disallowed tag p +

    +
      Bad
    • OK
    + + The output with '$config["keep_bad"] = 1': + + <*> Pseudo-tags <*> + <xml>Non-HTML tag xml</xml> + <p> + Disallowed tag p + </p> +
      Bad
    • OK
    + + The output with '$config["keep_bad"] = 3': + + <*> Pseudo-tags <*> + <xml>Non-HTML tag xml</xml> + <p> + Disallowed tag p + </p> +
    • OK
    + + The output with '$config["keep_bad"] = 6': + + <*> Pseudo-tags <*> + Non-HTML tag xml + + Disallowed tag p + +
    • OK
    + + An option like '1' is useful, e.g., when a writer previews his submission, whereas one like '3' is useful before content is finalized and made available to all. + + *Note:* In the example above, unlike '<*>', '' gets considered as a tag (even though there is no HTML element named 'xml'). Thus, the 'keep_bad' parameter's value affects '' but not '<*>'. In general, text matching the regular expression pattern '<(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>' is considered a tag (phrase enclosed by the angled brackets '<' and '>', and starting [with an optional slash preceding] with an alphanumeric word that starts with an alphabet...), and is subjected to the 'keep_bad' value. + + Nesting/content rules for each of the 86 elements in htmLawed's default set (see section:- #3.3) are defined in function 'hl_bal()'. This means that if a non-standard element besides 'embed' is being permitted through '$config["elements"]', the element's tag content will end up getting removed if '$config["balance"]' is set to '1'. + + Plain text and/or certain elements nested inside 'blockquote', 'form', 'map' and 'noscript' need to be in block-level elements. This point is often missed during manual writing of HTML code. htmLawed attempts to address this during balancing. E.g., if the parent container is set as 'form', the input 'B:C:' is converted to '
    B:C:
    '. + + +-- 3.3.4 Elements requiring child elements ------------------------o + + + As per specs, the following elements require legal child elements nested inside them: + + blockquote, dir, dl, form, map, menu, noscript, ol, optgroup, rbc, rtc, ruby, select, table, tbody, tfoot, thead, tr, ul + + In some cases, the specs stipulate the number and/or the ordering of the child elements. A 'table' can have 0 or 1 'caption', 'tbody', 'tfoot', and 'thead', but they must be in this order: 'caption', 'thead', 'tfoot', 'tbody'. + + htmLawed currently does not check for conformance to these rules. Note that any non-compliance in this regard will not introduce security vulnerabilities, crash browser applications, or affect the rendering of web-pages. + + With '$config["direct_list_nest"]' set to '1', htmLawed will allow direct nesting of an 'ol' or 'ul' list within another 'ol' or 'ul' without requiring the child list to be within an 'li' of the parent list. While this is not standard-compliant, directly nested lists are rendered properly by almost all browsers. The parameter '$config["direct_list_nest"]' has no effect if tag-balancing (section:- #3.3.3) is turned off. + + +-- 3.3.5 Beautify or compact HTML ---------------------------------o + + + By default, htmLawed will neither `beautify` HTML code by formatting it with indentations, etc., nor will it make it compact by removing un-needed white-space.(It does always properly white-space tag content.) + + As per the HTML standards, spaces, tabs and line-breaks in web-pages (except those inside 'pre' elements) are all considered equivalent, and referred to as `white-spaces`. Browser applications are supposed to consider contiguous white-spaces as just a single space, and to disregard white-spaces trailing opening tags or preceding closing tags. This white-space `normalization` allows the use of text/code beautifully formatted with indentations and line-spacings for readability. Such `pretty` HTML can, however, increase the size of web-pages, or make the extraction or scraping of plain text cumbersome. + + With the '$config' parameter 'tidy', htmLawed can be used to beautify or compact the input text. Input with just plain text and no HTML markup is also subject to this. Besides 'pre', the 'script' and 'textarea' elements, CDATA sections, and HTML comments are not subjected to the tidying process. + + To `compact`, use '$config["tidy"] = -1'; single instances or runs of white-spaces are replaced with a single space, and white-spaces trailing and leading open and closing tags, respectively, are removed. + + To `beautify`, '$config["tidy"]' is set as '1', or for customized tidying, as a string like '2s2n'. The 's' or 't' character specifies the use of spaces or tabs for indentation. The first and third characters, any of the digits 0-9, specify the number of spaces or tabs per indentation, and any parental lead spacing (extra indenting of the whole block of input text). The 'r' and 'n' characters are used to specify line-break characters: 'n' for '\n' (Unix/Mac OS X line-breaks), 'rn' or 'nr' for '\r\n' (Windows/DOS line-breaks), or 'r' for '\r'. + + The '$config["tidy"]' value of '1' is equivalent to '2s0n'. Other '$config["tidy"]' values are read loosely: a value of '4' is equivalent to '4s0n'; 't2', to '1t2n'; 's', to '2s0n'; '2TR', to '2t0r'; 'T1', to '1t1n'; 'nr3', to '3s0nr', and so on. Except in the indentations and line-spacings, runs of white-spaces are replaced with a single space during beautification. + + Input formatting using '$config["tidy"]' is not recommended when input text has mixed markup (like HTML + PHP). + + +-- 3.4 Attributes ------------------------------------------------oo + + + htmLawed will only permit attributes described in the HTML specs (including deprecated ones). It also permits some attributes for use with the 'embed' element (the non-standard 'embed' element is supported in htmLawed because of its widespread use), and the the 'xml:space' attribute (valid only in XHTML 1.1). A list of such 111 attributes and the elements they are allowed in is in section:- #5.2. Using the '$spec' argument, htmLawed can be forced to permit custom, non-standard attributes as well as custom rules for standard attributes (section:- #2.3). + + When '$config["deny_attribute"]' is not set, or set to '0', or empty ('""'), all the 111 attributes are permitted. Otherwise, '$config["deny_attribute"]' can be set as a list of comma-separated names of the denied attributes. 'on*' can be used to refer to the group of potentially dangerous, script-accepting attributes: 'onblur', 'onchange', 'onclick', 'ondblclick', 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onreset', 'onselect' and 'onsubmit'. + + Note that attributes specified in '$config["deny_attribute"]' are denied globally, for all elements. To deny attributes for only specific elements, '$spec' (see section:- #2.3) can be used. '$spec' can also be used to element-specifically permit an attribute otherwise denied through '$config["deny_attribute"]'. + + With '$config["safe"] = 1' (section:- #3.6), the 'on*' attributes are automatically disallowed. + + *Note*: To deny all but a few attributes globally, a simpler way to specify '$config["deny_attribute"]' would be to use the notation '* -attribute1 -attribute2 ...'. Thus, a value of '* -title -href' implies that except 'href' and 'title' (where allowed as per standards) all other attributes are to be removed. With this notation, the value for the parameter 'safe' (section:- #3.6) will have no effect on 'deny_attribute'. + + htmLawed (function 'hl_tag()') also: + + * Lower-cases attribute names + * Removes duplicate attributes (last one stays) + * Gives attributes the form 'name="value"' and single-spaces them, removing unnecessary white-spacing + * Provides `required` attributes (see section:- #3.4.1) + * Double-quotes values and escapes any '"' inside them + * Replaces the possibly dangerous soft-hyphen characters (hexadecimal code-point 'ad') in the values with spaces + * Allows custom function to additionally filter/modify attribute values (see section:- #3.4.9) + + +.. 3.4.1 Auto-addition of XHTML-required attributes ................ + + + If indicated attributes for the following elements are found missing, htmLawed (function 'hl_tag()') will add them (with values same as attribute names unless indicated otherwise below): + + * area - alt ('area') + * area, img - src, alt ('image') + * bdo - dir ('ltr') + * form - action + * map - name + * optgroup - label + * param - name + * script - type ('text/javascript') + * textarea - rows ('10'), cols ('50') + + Additionally, with '$config["xml:lang"]' set to '1' or '2', if the 'lang' but not the 'xml:lang' attribute is declared, then the latter is added too, with a value copied from that of 'lang'. This is for better standard-compliance. With '$config["xml:lang"]' set to '2', the 'lang' attribute is removed (XHTML 1.1 specs). + + Note that the 'name' attribute for 'map', invalid in XHTML 1.1, is also transformed if required -- see section:- #3.4.6. + + +.. 3.4.2 Duplicate/invalid 'id' values ............................o + + + If '$config["unique_ids"]' is '1', htmLawed (function 'hl_tag()') removes 'id' attributes with values that are not XHTML-compliant (must begin with a letter and can contain letters, digits, ':', '.', '-' and '_') or duplicate. If '$config["unique_ids"]' is a word, any duplicate but otherwise valid value will be appropriately prefixed with the word to ensure its uniqueness. The word should begin with a letter and should contain only letters, numbers, ':', '.', '_' and '-'. + + Even if multiple inputs need to be filtered (through multiple calls to htmLawed), htmLawed ensures uniqueness of 'id' values as it uses a global variable ('$GLOBALS["hl_Ids"]' array). Further, an admin can restrict the use of certain 'id' values by presetting this variable before htmLawed is called into use. E.g.: + + $GLOBALS['hl_Ids'] = array('top'=>1, 'bottom'=>1, 'myform'=>1); // id values not allowed in input + $processed = htmLawed($text); // filter input + + +.. 3.4.3 URL schemes (protocols) and scripts in attribute values ............o + + + htmLawed edits attributes that take URLs as values if they are found to contain un-permitted schemes. E.g., if the 'afp' scheme is not permitted, then '' becomes '', and if Javascript is not permitted '' becomes ''. + + By default htmLawed permits these schemes in URLs for the 'href' attribute: + + aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet + + Also, only 'file', 'http' and 'https' are permitted in attributes whose names start with 'o' (like 'onmouseover'), and in these attributes that accept URLs: + + action, cite, classid, codebase, data, href, longdesc, model, pluginspage, pluginurl, src, style, usemap + + These default sets are used when '$config["schemes"]' is not set (see section:- #2.2). To over-ride the defaults, '$config["schemes"]' is defined as a string of semi-colon-separated sub-strings of type 'attribute: comma-separated schemes'. E.g., 'href: mailto, http, https; onclick: javascript; src: http, https'. For unspecified attributes, 'file', 'http' and 'https' are permitted. This can be changed by passing schemes for '*' in '$config["schemes"]'. E.g., 'href: mailto, http, https; *: https, https'. + + '*' can be put in the list of schemes to permit all protocols. E.g., 'style: *; img: http, https' results in protocols not being checked in 'style' attribute values. However, in such cases, any relative-to-absolute URL conversion, or vice versa, (section:- #3.4.4) is not done. + + Thus, `to allow Javascript`, one can set '$config["schemes"]' as 'href: mailto, http, https; *: http, https, javascript', or 'href: mailto, http, https, javascript; *: http, https, javascript', or '*: *', and so on. + + As a side-note, one may find 'style: *' useful as URLs in 'style' attributes can be specified in a variety of ways, and the patterns that htmLawed uses to identify URLs may mistakenly identify non-URL text. + + '!' can be put in the list of schemes to disallow all protocols as well as `local` URLs. Thus, with 'href: http, style: !', 'CNN' will become 'CNN'. + + *Note*: If URL-accepting attributes other than those listed above are being allowed, then the scheme will not be checked unless the attribute name contains the string 'src' (e.g., 'dynsrc') or starts with 'o' (e.g., 'onbeforecopy'). + + With '$config["safe"] = 1', all URLs are disallowed in the 'style' attribute values. + + +.. 3.4.4 Absolute & relative URLs in attribute values .............o + + + htmLawed can make absolute URLs in attributes like 'href' relative ('$config["abs_url"]' is '-1'), and vice versa ('$config["abs_url"]' is '1'). URLs in scripts are not considered for this, and so are URLs like '#section_6' (fragment), '?name=Tim#show' (starting with query string), and ';var=1?name=Tim#show' (starting with parameters). Further, this requires that '$config["base_url"]' be set properly, with the '://' and a trailing slash ('/'), with no query string, etc. E.g., 'file:///D:/page/', 'https://abc.com/x/y/', or 'http://localhost/demo/' are okay, but 'file:///D:/page/?help=1', 'abc.com/x/y/' and 'http://localhost/demo/index.htm' are not. + + For making absolute URLs relative, only those URLs that have the '$config["base_url"]' string at the beginning are converted. E.g., with '$config["base_url"] = "https://abc.com/x/y/"', 'https://abc.com/x/y/a.gif' and 'https://abc.com/x/y/z/b.gif' become 'a.gif' and 'z/b.gif' respectively, while 'https://abc.com/x/c.gif' is not changed. + + When making relative URLs absolute, only values for scheme, network location (host-name) and path values in the base URL are inherited. See section:- #5.5 for more about the URL specification as per RFC 1808:- http://www.ietf.org/rfc/rfc1808.txt. + + +.. 3.4.5 Lower-cased, standard attribute values ....................o + + + Optionally, for standard-compliance, htmLawed (function 'hl_tag()') lower-cases standard attribute values to give, e.g., 'input type="password"' instead of 'input type="Password"', if '$config["lc_std_val"]' is '1'. Attribute values matching those listed below for any of the elements (plus those for the 'type' attribute of 'button' or 'input') are lower-cased: + + all, baseline, bottom, button, center, char, checkbox, circle, col, colgroup, cols, data, default, file, get, groups, hidden, image, justify, left, ltr, middle, none, object, password, poly, post, preserve, radio, rect, ref, reset, right, row, rowgroup, rows, rtl, submit, text, top + + a, area, bdo, button, col, form, img, input, object, option, optgroup, param, script, select, table, td, tfoot, th, thead, tr, xml:space + + The following `empty` (`minimized`) attributes are always assigned lower-cased values (same as the names): + + checked, compact, declare, defer, disabled, ismap, multiple, nohref, noresize, noshade, nowrap, readonly, selected + + +.. 3.4.6 Transformation of deprecated attributes ..................o + + + If '$config["no_deprecated_attr"]' is '0', then deprecated attributes (see appendix in section:- #5.2) are removed and, in most cases, their values are transformed to CSS style properties and added to the 'style' attributes (function 'hl_tag()'). Except for 'bordercolor' for 'table', 'tr' and 'td', the scores of proprietary attributes that were never part of any cross-browser standard are not supported. + + *Note*: The attribute 'target' for 'a' is allowed even though it is not in XHTML 1.0 specs. This is because of the attribute's wide-spread use and browser-support, and because the attribute is valid in XHTML 1.1 onwards. + + * align - for 'img' with value of 'left' or 'right', becomes, e.g., 'float: left'; for 'div' and 'table' with value 'center', becomes 'margin: auto'; all others become, e.g., 'text-align: right' + + * bgcolor - E.g., 'bgcolor="#ffffff"' becomes 'background-color: #ffffff' + * border - E.g., 'height= "10"' becomes 'height: 10px' + * bordercolor - E.g., 'bordercolor=#999999' becomes 'border-color: #999999;' + * compact - 'font-size: 85%' + * clear - E.g., 'clear="all" becomes 'clear: both' + + * height - E.g., 'height= "10"' becomes 'height: 10px' and 'height="*"' becomes 'height: auto' + + * hspace - E.g., 'hspace="10"' becomes 'margin-left: 10px; margin-right: 10px' + * language - 'language="VBScript"' becomes 'type="text/vbscript"' + * name - E.g., 'name="xx"' becomes 'id="xx"' + * noshade - 'border-style: none; border: 0; background-color: gray; color: gray' + * nowrap - 'white-space: nowrap' + * size - E.g., 'size="10"' becomes 'height: 10px' + * start - removed + * type - E.g., 'type="i"' becomes 'list-style-type: lower-roman' + * value - removed + * vspace - E.g., 'vspace="10"' becomes 'margin-top: 10px; margin-bottom: 10px' + * width - like 'height' + + Example input: + + imageimage +
    +
    + image + + + + + +
    +
    +

    Section

    +

    Para

    +
    1. First item
    +
    +
    +
    1. First item
    +
    +
    + + And the output with '$config["no_deprecated_attr"] = 1': + + imageimage +
    +
    + image + + + + + +
    +
    +

    Section

    +

    Para

    +
    1. First item
    +
    +
    +
    1. First item
    +
    +
    + + For 'lang', deprecated in XHTML 1.1, transformation is taken care of through '$config["xml:lang"]'; see section:- #3.4.1. + + The attribute 'name' is deprecated in 'form', 'iframe', and 'img', and is replaced with 'id' if an 'id' attribute doesn't exist and if the 'name' value is appropriate for 'id'. For such replacements for 'a' and 'map', for which the 'name' attribute is deprecated in XHTML 1.1, '$config["no_deprecated_attr"]' should be set to '2' (when set to '1', for these two elements, the 'name' attribute is retained). + + +-- 3.4.7 Anti-spam & 'href' ---------------------------------------o + + + htmLawed (function 'hl_tag()') can check the 'href' attribute values (link addresses) as an anti-spam (email or link spam) measure. + + If '$config["anti_mail_spam"]' is not '0', the '@' of email addresses in 'href' values like 'mailto:a@b.com' is replaced with text specified by '$config["anti_mail_spam"]'. The text should be of a form that makes it clear to others that the address needs to be edited before a mail is sent; e.g., '@' (makes the example address 'a@b.com'). + + For regular links, one can choose to have a 'rel' attribute with 'nofollow' in its value (which tells some search engines to not follow a link). This can discourage link spammers. Additionally, or as an alternative, one can choose to empty the 'href' value altogether (disable the link). + + For use of these options, '$config["anti_link_spam"]' should be set as an array with values 'regex1' and 'regex2', both or one of which can be empty (like 'array("", "regex2")') to indicate that that option is not to be used. Otherwise, 'regex1' or 'regex2' should be PHP- and PCRE-compatible regular expression patterns: 'href' values will be matched against them and those matching the pattern will accordingly be treated. + + Note that the regular expressions should have `delimiters`, and be well-formed and preferably fast. Absolute efficiency/accuracy is often not needed. + + An example, to have a 'rel' attribute with 'nofollow' for all links, and to disable links that do not point to domains 'abc.com' and 'xyz.org': + + $config["anti_link_spam"] = array('`.`', '`://\W*(?!(abc\.com|xyz\.org))`'); + + +-- 3.4.8 Inline style properties ----------------------------------o + + + htmLawed can check URL schemes and dynamic expressions (to guard against Javascript, etc., script-based insecurities) in inline CSS style property values in the 'style' attributes. (CSS properties like 'background-image' that accept URLs in their values are noted in section:- #5.3.) Dynamic CSS expressions that allow scripting in the IE browser, and can be a vulnerability, can be removed from property values by setting '$config["css_expression"]' to '1' (default setting). Note that when '$config["css_expression"]' is set to '1', htmLawed will remove '/*' from the 'style' values. + + *Note*: Because of the various ways of representing characters in attribute values (URL-escapement, entitification, etc.), htmLawed might alter the values of the 'style' attribute values, and may even falsely identify dynamic CSS expressions and URL schemes in them. If this is an important issue, checking of URLs and dynamic expressions can be turned off ('$config["schemes"] = "...style:*..."', see section:- #3.4.3, and '$config["css_expression"] = 0'). Alternately, admins can use their own custom function for finer handling of 'style' values through the 'hook_tag' parameter (see section:- #3.4.9). + + It is also possible to have htmLawed let through any 'style' value by setting '$config["style_pass"]' to '1'. + + As such, it is better to set up a CSS file with class declarations, disallow the 'style' attribute, set a '$spec' rule (see section:- #2.3) for 'class' for the 'oneof' or 'match' parameter, and ask writers to make use of the 'class' attribute. + + +-- 3.4.9 Hook function for tag content ----------------------------o + + + It is possible to utilize a custom hook function to alter the tag content htmLawed has finalized (i.e., after it has checked/corrected for required attributes, transformed attributes, lower-cased attribute names, etc.). + + When '$config' parameter 'hook_tag' is set to the name of a function, htmLawed (function 'hl_tag()') will pass on the element name, and, in the case of an opening tag, the `finalized` attribute name-value pairs as array elements to the function. The function, after completing a task such as filtering or tag transformation, will typically return an empty string, the full opening tag string like '' (for empty elements like 'img' and 'input', the element-closing slash '/' should also be included), etc. + + Any 'hook_tag' function, since htmLawed version 1.1.11, also receives names of elements in closing tags, such as 'a' in the closing '' tag of the element 'CNN'. Unlike for opening tags, no other value (i.e., the attribute name-value array) is passed to the function since a closing tag contains only element names. Typically, the function will return an empty string or a full closing tag (like ''). + + This is a *powerful functionality* that can be exploited for various objectives: consolidate-and-convert inline 'style' attributes to 'class', convert 'embed' elements to 'object', permit only one 'caption' element in a 'table' element, disallow embedding of certain types of media, *inject HTML*, use CSSTidy:- http://csstidy.sourceforge.net to sanitize 'style' attribute values, etc. + + As an example, the custom hook code below can be used to force a series of specifically ordered 'id' attributes on all elements, and a specific 'param' element inside all 'object' elements: + + function my_tag_function($element, $attribute_array=0){ + + // If second argument is not received, it means a closing tag is being handled + if(is_numeric($attribute_array)){ + return ""; + } + + static $id = 0; + // Remove any duplicate element + if($element == 'param' && isset($attribute_array['allowscriptaccess'])){ + return ''; + } + + $new_element = ''; + + // Force a serialized ID number + $attribute_array['id'] = 'my_'. $id; + ++$id; + + // Inject param for allowscriptaccess + if($element == 'object'){ + $new_element = ''; + ++$id; + } + + $string = ''; + foreach($attribute_array as $k=>$v){ + $string .= " {$k}=\"{$v}\""; + } + + static $empty_elements = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); + + return "<{$element}{$string}". (isset($in_array($element, $empty_elements) ? ' /' : ''). '>'. $new_element; + } + + The 'hook_tag' parameter is different from the 'hook' parameter (section:- #3.7). + + Snippets of hook function code developed by others may be available on the htmLawed:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed website. + + +-- 3.5 Simple configuration directive for most valid XHTML -------oo + + + If '$config["valid_xhtml"]' is set to '1', some relevant '$config' parameters (indicated by '~' in section:- #2.2) are auto-adjusted. This allows one to pass the '$config' argument with a simpler value. If a value for a parameter auto-set through 'valid_xhtml' is still manually provided, then that value will over-ride the auto-set value. + + +-- 3.6 Simple configuration directive for most `safe` HTML --------o + + + `Safe` HTML refers to HTML that is restricted to reduce the vulnerability for scripting attacks (such as XSS) based on HTML code which otherwise may still be legal and compliant with the HTML standard specs. When elements such as 'script' and 'object', and attributes such as 'onmouseover' and 'style' are allowed in the input text, an input writer can introduce malevolent HTML code. Note that what is considered 'safe' depends on the nature of the web application and the trust-level accorded to its users. + + htmLawed allows an admin to use '$config["safe"]' to auto-adjust multiple '$config' parameters (such as 'elements' which declares the allowed element-set), which otherwise would have to be manually set. The relevant parameters are indicated by '"' in section:- #2.2). Thus, one can pass the '$config' argument with a simpler value. + + With the value of '1', htmLawed considers 'CDATA' sections and HTML comments as plain text, and prohibits the 'applet', 'embed', 'iframe', 'object' and 'script' elements, and the 'on*' attributes like 'onclick'. ( There are '$config' parameters like 'css_expression' that are not affected by the value set for 'safe' but whose default values still contribute towards a more `safe` output.) Further, URLs with schemes (see section:- #3.4.3) are neutralized so that, e.g., 'style="moz-binding:url(http://danger)"' becomes 'style="moz-binding:url(denied:http://danger)"'. + + Admins, however, may still want to completely deny the 'style' attribute, e.g., with code like + + $processed = htmLawed($text, array('safe'=>1, 'deny_attribute'=>'style')); + + Permitting the 'style' attribute brings in risks of `click-jacking`, etc. CSS property values can render a page non-functional or be used to deface it. Except for URLs, dynamic expressions, and some other things, htmLawed does not completely check 'style' values. It does provide ways for the code-developer implementing htmLawed to do such checks through the '$spec' argument, and through the 'hook_tag' parameter (see section:- #3.4.8 for more). Disallowing style completely and relying on CSS classes and stylesheet files is recommended. + + If a value for a parameter auto-set through 'safe' is still manually provided, then that value can over-ride the auto-set value. E.g., with '$config["safe"] = 1' and '$config["elements"] = "*+script"', 'script', but not 'applet', is allowed. + + A page illustrating the efficacy of htmLawed's anti-XSS abilities with 'safe' set to '1' against XSS vectors listed by RSnake:- http://ha.ckers.org/xss.html may be available here:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/rsnake/RSnakeXSSTest.htm. + + +-- 3.7 Using a hook function --------------------------------------o + + + If '$config["hook"]' is not set to '0', then htmLawed will allow preliminarily processed input to be altered by a hook function named by '$config["hook"]' before starting the main work (but after handling of characters, entities, HTML comments and 'CDATA' sections -- see code for function 'htmLawed()'). + + The hook function also allows one to alter the `finalized` values of '$config' and '$spec'. + + Note that the 'hook' parameter is different from the 'hook_tag' parameter (section:- #3.4.9). + + Snippets of hook function code developed by others may be available on the htmLawed:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed website. + + +-- 3.8 Obtaining `finalized` parameter values ---------------------o + + + htmLawed can assign the `finalized` '$config' and '$spec' values to a variable named by '$config["show_setting"]'. The variable, made global by htmLawed, is set as an array with three keys: 'config', with the '$config' value, 'spec', with the '$spec' value, and 'time', with a value that is the Unix time (the output of PHP's 'microtime()' function) when the value was assigned. Admins should use a PHP-compliant variable name (e.g., one that does not begin with a numerical digit) that does not conflict with variable names in their non-htmLawed code. + + The values, which are also post-hook function (if any), can be used to auto-generate information (on, e.g., the elements that are permitted) for input writers. + + +-- 3.9 Retaining non-HTML tags in input with mixed markup ---------o + + + htmLawed does not remove certain characters that, though invalid, are nevertheless `discouraged` in HTML documents as per the specifications (see section:- #5.1). This can be utilized to deal with input that contains mixed markup. Input that may have HTML markup as well as some other markup that is based on the '<', '>' and '&' characters is considered to have mixed markup. The non-HTML markup can be rather proprietary (like markup for emoticons/smileys), or standard (like MathML or SVG). Or it can be programming code meant for execution/evaluation (such as embedded PHP code). + + To deal with such mixed markup, the input text can be pre-processed to hide the non-HTML markup by specifically replacing the '<', '>' and '&' characters with some of the HTML-discouraged characters (see section:- #3.1.2). Post-htmLawed processing, the replacements are reverted. + + An example (mixed HTML and PHP code in input text): + + $text = preg_replace('`<\?php(.+?)\?>`sm', "\x83?php\\1?\x84", $text); + $processed = htmLawed($text); + $processed = preg_replace('`\x83\?php(.+?)\?\x84`sm', '', $processed); + + This code will not work if '$config["clean_ms_char"]' is set to '1' (section:- #3.1), in which case one should instead deploy a hook function (section:- #3.7). (htmLawed internally uses certain control characters, code-points '1' to '7', and use of these characters as markers in the logic of hook functions may cause issues.) + + Admins may also be able to use '$config["and_mark"]' to deal with such mixed markup; see section:- #3.2. + + +== 4 Other =======================================================oo + + +-- 4.1 Support ----------------------------------------------------- + + + A careful reading of this documentation may provide an answer. + + Software updates and forum-based community-support may be found at http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed. For general PHP issues (not htmLawed-specific), support may be found through internet searches and at http://php.net. + + +-- 4.2 Known issues -----------------------------------------------o + + + See section:- #2.8. + + +-- 4.3 Change-log -------------------------------------------------o + + + (The release date for the downloadable package of files containing documentation, demo script, test-cases, etc., besides the 'htmLawed.php' file, may be updated without a change-log entry if the secondary files, but not htmLawed per se, are revised.) + + `Version number - Release date. Notes` + + 1.1.16 - 29 August 2013. Fix for a potential security vulnerability arising from specialy encoded space characters in URL schemes/protocols + + 1.1.15 - 11 August 2013. Improved tidying/prettifying functionality + + 1.1.14 - 8 August 2012. Fix for possible segmental loss of incremental indentation during 'tidying' when 'balance' is disabled; fix for non-effectuation under some circumstances of a corrective behavior to preserve plain text within elements like 'blockquote'. + + 1.1.13 - 22 July 2012. Added feature allowing use of custom, non-standard attributes or custom rules for standard attributes + + 1.1.12 - 5 July 2012. Fix for a bug in identifying an unquoted value of the 'face' attribute + + 1.1.11 - 5 June 2012. Fix for possible problem with handling of multi-byte characters in attribute values in an mbstring.func_overload enviroment. '$config["hook_tag"]', if specified, now receives names of elements in closing tags. + + 1.1.10 - 22 October 2011. Fix for a bug in the 'tidy' functionality that caused the entire input to be replaced with a single space; new parameter, '$config["direct_list_nest"]' to allow direct descendance of a list in a list. (5 April 2012. Dual licensing from LGPLv3 to LGPLv3 and GPLv2+.) + + 1.1.9.5 - 6 July 2011. Minor correction of a rule for nesting of 'li' within 'dir' + + 1.1.9.4 - 3 July 2010. Parameter 'schemes' now accepts '!' so any URL, even a local one, can be `denied`. An issue in which a second URL value in 'style' properties was not checked was fixed. + + 1.1.9.3 - 17 May 2010. Checks for correct nesting of 'param' + + 1.1.9.2 - 26 April 2010. Minor fix regarding rendering of denied URL schemes + + 1.1.9.1 - 26 February 2010. htmLawed now uses the LGPL version 3 license; support for 'flashvars' attribute for 'embed' + + 1.1.9 - 22 December 2009. Soft-hyphens are now removed only from URL-accepting attribute values + + 1.1.8.1 - 16 July 2009. Minor code-change to fix a PHP error notice + + 1.1.8 - 23 April 2009. Parameter 'deny_attribute' now accepts the wild-card '*', making it simpler to specify its value when all but a few attributes are being denied; fixed a bug in interpreting '$spec' + + 1.1.7 - 11-12 March 2009. Attributes globally denied through 'deny_attribute' can be allowed element-specifically through '$spec'; '$config["style_pass"]' allowing letting through any 'style' value introduced; altered logic to catch certain types of dynamic crafted CSS expressions + + 1.1.3-6 - 28-31 January - 4 February 2009. Altered logic to catch certain types of dynamic crafted CSS expressions + + 1.1.2 - 22 January 2009. Fixed bug in parsing of 'font' attributes during tag transformation + + 1.1.1 - 27 September 2008. Better nesting correction when omitable closing tags are absent + + 1.1 - 29 June 2008. '$config["hook_tag"]' and '$config["tidy"]' introduced for custom tag/attribute check/modification/injection and output compaction/beautification; fixed a regex-in-$spec parsing bug + + 1.0.9 - 11 June 2008. Fix for a bug in checks for invalid HTML code-point entities + + 1.0.8 - 15 May 2008. Permit 'bordercolor' attribute for 'table', 'td' and 'tr' + + 1.0.7 - 1 May 2008. Support for 'wmode' attribute for 'embed'; '$config["show_setting"]' introduced; improved '$config["elements"]' evaluation + + 1.0.6 - 20 April 2008. '$config["and_mark"]' introduced + + 1.0.5 - 12 March 2008. 'style' URL schemes essentially disallowed when $config 'safe' is on; improved regex for CSS expression search + + 1.0.4 - 10 March 2008. Improved corrections for 'blockquote', 'form', 'map' and 'noscript' + + 1.0.3 - 3 March 2008. Character entities for soft-hyphens are now replaced with spaces (instead of being removed); fix for a bug allowing 'td' directly inside 'table'; '$config["safe"]' introduced + + 1.0.2 - 13 February 2008. Improved implementation of '$config["keep_bad"]' + + 1.0.1 - 7 November 2007. Improved regex for identifying URLs, protocols and dynamic expressions ('hl_tag()' and 'hl_prot()'); no error display with 'hl_regex()' + + 1.0 - 2 November 2007. First release + + +-- 4.4 Testing ----------------------------------------------------o + + + To test htmLawed using a form interface, a demo:- htmLawedTest.php web-page is provided with the htmLawed distribution ('htmLawed.php' and 'htmLawedTest.php' should be in the same directory on the web-server). A file with test-cases:- htmLawed_TESTCASE.txt is also provided. + + +-- 4.5 Upgrade, & old versions ------------------------------------o + + + Upgrading is as simple as replacing the previous version of 'htmLawed.php' (assuming it was not modified for customized features). As htmLawed output is almost always used in static documents, upgrading should not affect old, finalized content. + + *Important* The following upgrades may affect the functionality of a specific htmLawed installation: + + (1) From version 1.1-1.1.10 to 1.1.11 (or later), if a 'hook_tag' function is in use: In version 1.1.11, elements in closing tags (and not just the opening tags) are also passed to the function. There are no attribute names/values to pass, so a 'hook_tag' function receives only the element name. The 'hook_tag' function therefore may have to be edited. See section:- #3.4.9. + + Old versions of htmLawed may be available online. E.g., for version 1.0, check http://www.bioinformatics.org/phplabware/downloads/htmLawed1.zip, for 1.1.1, htmLawed111.zip, and for 1.1.10, htmLawed1110.zip. + + +-- 4.6 Comparison with 'HTMLPurifier' -----------------------------o + + + The HTMLPurifier PHP library by Edward Yang is a very good HTML filtering script that uses object oriented PHP code. Compared to htmLawed, it (as of year 2010): + + * does not support PHP versions older than 5.0 (HTMLPurifier dropped PHP 4 support after version 2) + + * is 15-20 times bigger (scores of files totalling more than 750 kb) + + * consumes 10-15 times more RAM memory (just including the HTMLPurifier files without calling the filter requires a few MBs of memory) + + * is expectedly slower + + * does not allow admins to fully allow all valid HTML (because of incomplete HTML support, it always considers elements like 'script' illegal) + + * lacks many of the extra features of htmLawed (like entity conversions and code compaction/beautification) + + * has poor documentation + + However, HTMLPurifier has finer checks for character encodings and attribute values, and can log warnings and errors. Visit the HTMLPurifier website:- http://htmlpurifier.org for updated information. + + +-- 4.7 Use through application plug-ins/modules -------------------o + + + Plug-ins/modules to implement htmLawed in applications such as Drupal and DokuWiki may have been developed. Please check the application websites and the forum on the htmLawed site:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed. + + +-- 4.8 Use in non-PHP applications --------------------------------o + + + Non-PHP applications written in Python, Ruby, etc., may be able to use htmLawed through system calls to the PHP engine. Such code may have been documented on the internet. Also check the forum on the htmLawed site:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed. + + +-- 4.9 Donate -----------------------------------------------------o + + + A donation in any currency and amount to appreciate or support this software can be sent by PayPal:- http://paypal.com to this email address: drpatnaik at yahoo dot com. + + +-- 4.10 Acknowledgements ------------------------------------------o + + + Nicholas Alipaz, Bryan Blakey, Pádraic Brady, Dac Chartrand, Ulf Harnhammer, Gareth Heyes, Klaus Leithoff, Lukasz Pilorz, Shelley Powers, Harro Verton, Edward Yang, and many anonymous users. + + Thank you! + + +== 5 Appendices ==================================================oo + + +-- 5.1 Characters discouraged in XHTML ----------------------------- + + + Characters represented by the following hexadecimal code-points are `not` invalid, even though some validators may issue messages stating otherwise. + + '7f' to '84', '86' to '9f', 'fdd0' to 'fddf', '1fffe', '1ffff', '2fffe', '2ffff', '3fffe', '3ffff', '4fffe', '4ffff', '5fffe', '5ffff', '6fffe', '6ffff', '7fffe', '7ffff', '8fffe', '8ffff', '9fffe', '9ffff', 'afffe', 'affff', 'bfffe', 'bffff', 'cfffe', 'cffff', 'dfffe', 'dffff', 'efffe', 'effff', 'ffffe', 'fffff', '10fffe' and '10ffff' + + +-- 5.2 Valid attribute-element combinations -----------------------o + + + Valid attribute-element combinations as per W3C:- http://www.w3c.org specs. + + * includes deprecated attributes (marked '^'), attributes for the non-standard 'embed' element (marked '*'), and the proprietary 'bordercolor' (marked '~') + * only non-frameset, HTML body elements + * 'name' for 'a' and 'map', and 'lang' are invalid in XHTML 1.1 + * 'target' is valid for 'a' in XHTML 1.1 and higher + * 'xml:space' is only for XHTML 1.1 + + abbr - td, th + accept - form, input + accept-charset - form + accesskey - a, area, button, input, label, legend, textarea + action - form + align - caption^, embed, applet, iframe, img^, input^, object^, legend^, table^, hr^, div^, h1^, h2^, h3^, h4^, h5^, h6^, p^, col, colgroup, tbody, td, tfoot, th, thead, tr + alt - applet, area, img, input + archive - applet, object + axis - td, th + bgcolor - embed, table^, tr^, td^, th^ + border - table, img^, object^ + bordercolor~ - table, td, tr + cellpadding - table + cellspacing - table + char - col, colgroup, tbody, td, tfoot, th, thead, tr + charoff - col, colgroup, tbody, td, tfoot, th, thead, tr + charset - a, script + checked - input + cite - blockquote, q, del, ins + classid - object + clear - br^ + code - applet + codebase - object, applet + codetype - object + color - font + cols - textarea + colspan - td, th + compact - dir, dl^, menu, ol^, ul^ + coords - area, a + data - object + datetime - del, ins + declare - object + defer - script + dir - bdo + disabled - button, input, optgroup, option, select, textarea + enctype - form + face - font + flashvars* - embed + for - label + frame - table + frameborder - iframe + headers - td, th + height - embed, iframe, td^, th^, img, object, applet + href - a, area + hreflang - a + hspace - applet, img^, object^ + ismap - img, input + label - option, optgroup + language - script^ + longdesc - img, iframe + marginheight - iframe + marginwidth - iframe + maxlength - input + method - form + model* - embed + multiple - select + name - button, embed, textarea, applet^, select, form^, iframe^, img^, a^, input, object, map^, param + nohref - area + noshade - hr^ + nowrap - td^, th^ + object - applet + onblur - a, area, button, input, label, select, textarea + onchange - input, select, textarea + onfocus - a, area, button, input, label, select, textarea + onreset - form + onselect - input, textarea + onsubmit - form + pluginspage* - embed + pluginurl* - embed + prompt - isindex + readonly - textarea, input + rel - a + rev - a + rows - textarea + rowspan - td, th + rules - table + scope - td, th + scrolling - iframe + selected - option + shape - area, a + size - hr^, font, input, select + span - col, colgroup + src - embed, script, input, iframe, img + standby - object + start - ol^ + summary - table + tabindex - a, area, button, input, object, select, textarea + target - a^, area, form + type - a, embed, object, param, script, input, li^, ol^, ul^, button + usemap - img, input, object + valign - col, colgroup, tbody, td, tfoot, th, thead, tr + value - input, option, param, button, li^ + valuetype - param + vspace - applet, img^, object^ + width - embed, hr^, iframe, img, object, table, td^, th^, applet, col, colgroup, pre^ + wmode - embed + xml:space - pre, script, style + + These are allowed in all but the shown elements: + + class - param, script + dir - applet, bdo, br, iframe, param, script + id - script + lang - applet, br, iframe, param, script + onclick - applet, bdo, br, font, iframe, isindex, param, script + ondblclick - applet, bdo, br, font, iframe, isindex, param, script + onkeydown - applet, bdo, br, font, iframe, isindex, param, script + onkeypress - applet, bdo, br, font, iframe, isindex, param, script + onkeyup - applet, bdo, br, font, iframe, isindex, param, script + onmousedown - applet, bdo, br, font, iframe, isindex, param, script + onmousemove - applet, bdo, br, font, iframe, isindex, param, script + onmouseout - applet, bdo, br, font, iframe, isindex, param, script + onmouseover - applet, bdo, br, font, iframe, isindex, param, script + onmouseup - applet, bdo, br, font, iframe, isindex, param, script + style - param, script + title - param, script + xml:lang - applet, br, iframe, param, script + + +-- 5.3 CSS 2.1 properties accepting URLs ------------------------o + + + background + background-image + content + cue-after + cue-before + cursor + list-style + list-style-image + play-during + + +-- 5.4 Microsoft Windows 1252 character replacements --------------o + + + Key: 'd' double, 'l' left, 'q' quote, 'r' right, 's.' single + + Code-point (decimal) - hexadecimal value - replacement entity - represented character + + 127 - 7f - (removed) - (not used) + 128 - 80 - € - euro + 129 - 81 - (removed) - (not used) + 130 - 82 - ‚ - baseline s. q + 131 - 83 - ƒ - florin + 132 - 84 - „ - baseline d q + 133 - 85 - … - ellipsis + 134 - 86 - † - dagger + 135 - 87 - ‡ - d dagger + 136 - 88 - ˆ - circumflex accent + 137 - 89 - ‰ - permile + 138 - 8a - Š - S Hacek + 139 - 8b - ‹ - l s. guillemet + 140 - 8c - Œ - OE ligature + 141 - 8d - (removed) - (not used) + 142 - 8e - Ž - Z dieresis + 143 - 8f - (removed) - (not used) + 144 - 90 - (removed) - (not used) + 145 - 91 - ‘ - l s. q + 146 - 92 - ’ - r s. q + 147 - 93 - “ - l d q + 148 - 94 - ” - r d q + 149 - 95 - • - bullet + 150 - 96 - – - en dash + 151 - 97 - — - em dash + 152 - 98 - ˜ - tilde accent + 153 - 99 - ™ - trademark + 154 - 9a - š - s Hacek + 155 - 9b - › - r s. guillemet + 156 - 9c - œ - oe ligature + 157 - 9d - (removed) - (not used) + 158 - 9e - ž - z dieresis + 159 - 9f - Ÿ - Y dieresis + + +-- 5.5 URL format -------------------------------------------------o + + + An `absolute` URL has a 'protocol' or 'scheme', a 'network location' or 'hostname', and, optional 'path', 'parameters', 'query' and 'fragment' segments. Thus, an absolute URL has this generic structure: + + (scheme) : (//network location) /(path) ;(parameters) ?(query) #(fragment) + + The schemes can only contain letters, digits, '+', '.' and '-'. Hostname is the portion after the '//' and up to the first '/' (if any; else, up to the end) when ':' is followed by a '//' (e.g., 'abc.com' in 'ftp://abc.com/def'); otherwise, it consists of everything after the ':' (e.g., 'def@abc.com' in mailto:def@abc.com'). + + `Relative` URLs do not have explicit schemes and network locations; such values are inherited from a `base` URL. + + +-- 5.6 Brief on htmLawed code -------------------------------------o + + + Much of the code's logic and reasoning can be understood from the documentation above. + + The *output* of htmLawed is a text string containing the processed input. There is no custom error tracking. + + *Function arguments* for htmLawed are: + + * '$in' - first argument; a text string; the *input text* to be processed. Any extraneous slashes added by PHP when `magic quotes` are enabled should be removed beforehand using PHP's 'stripslashes()' function. + + * '$config' - second argument; an associative array; optional; named '$C' within htmLawed code. The array has keys with names like 'balance' and 'keep_bad', and the values, which can be boolean, string, or array, depending on the key, are read to accordingly set the *configurable parameters* (indicated by the keys). All configurable parameters receive some default value if the value to be used is not specified by the user through '$config'. `Finalized` '$config' is thus a filtered and possibly larger array. + + * '$spec' - third argument; a text string; optional. The string has rules, written in an htmLawed-designated format, *specifying* element-specific attribute and attribute value restrictions. Function 'hl_spec()' is used to convert the string to an associative-array, named '$S' within htmLawed code, for internal use. `Finalized` '$spec' is thus an array. + + `Finalized` '$config' and '$spec' are made *global variables* while htmLawed is at work. Values of any pre-existing global variables with same names are noted, and their values are restored after htmLawed finishes processing the input (to capture the `finalized` values, the 'show_settings' parameter of '$config' should be used). Depending on '$config', another global variable 'hl_Ids', to track 'id' attribute values for uniqueness, may be set. Unlike the other two variables, this one is not reset (or unset) post-processing. + + Except for the main function 'htmLawed()' and the functions 'kses()' and 'kses_hook()', htmLawed's functions are *name-spaced* using the 'hl_' prefix. The *functions* and their roles are: + + * 'hl_attrval' - checking attribute values against $spec + * 'hl_bal' - tag balancing + * 'hl_cmtcd' - handling CDATA sections and HTML comments + * 'hl_ent' - entity handling + * 'hl_prot' - checking a URL scheme/protocol + * 'hl_regex' - checking syntax of a regular expression + * 'hl_spec' - converting user-supplied $spec value to one used by htmLawed internally + * 'hl_tag' - handling tags + * 'hl_tag2' - transforming tags + * 'hl_tidy' - compact/beautify HTML + * 'hl_version' - reporting htmLawed version + * 'htmLawed' - main function + * 'kses' - main function of 'kses' + * 'kses_hook' - hook function of 'kses' + + The last two are for compatibility with pre-existing code using the 'kses' script. htmLawed's 'kses()' basically passes on the filtering task to 'htmLawed()' function after deciphering '$config' and '$spec' from the argument values supplied to it. 'kses_hook()' is an empty function and is meant for being filled with custom code if the 'kses' script users were using one. + + 'htmLawed()' finalizes '$spec' (with the help of 'hl_spec()') and '$config', and globalizes them. Finalization of '$config' involves setting default values if an inappropriate or invalid one is supplied. This includes calling 'hl_regex()' to check well-formedness of regular expression patterns if such expressions are user-supplied through '$config'. 'htmLawed()' then removes invalid characters like nulls and 'x01' and appropriately handles entities using 'hl_ent()'. HTML comments and CDATA sections are identified and treated as per '$config' with the help of 'hl_cmtcd()'. When retained, the '<' and '>' characters identifying them, and the '<', '>' and '&' characters inside them, are replaced with control characters (code-points '1' to '5') till any tag balancing is completed. + + After this `initial processing` 'htmLawed()' identifies tags using regex and processes them with the help of 'hl_tag()' -- a large function that analyzes tag content, filtering it as per HTML standards, '$config' and '$spec'. Among other things, 'hl_tag()' transforms deprecated elements using 'hl_tag2()', removes attributes from closing tags, checks attribute values as per '$spec' rules using 'hl_attrval()', and checks URL protocols using 'hl_prot()'. 'htmLawed()' performs tag balancing and nesting checks with a call to 'hl_bal()', and optionally compacts/beautifies the output with proper white-spacing with a call to 'hl_tidy()'. The latter temporarily replaces white-space, and '<', '>' and '&' characters inside 'pre', 'script' and 'textarea' elements, and HTML comments and CDATA sections with control characters (code-points '1' to '5', and '7'). + + htmLawed permits the use of custom code or *hook functions* at two stages. The first, called inside 'htmLawed()', allows the input text as well as the finalized '$config' and '$spec' values to be altered right after the initial processing (see section:- #3.7). The second is called by 'hl_tag()' once the tag content is finalized (see section:- #3.4.9). + + The functionality of htmLawed is dictated by the external HTML standard. It is thus coded for a clear-cut objective with not much concern for tweakability. The code is only minimally annotated with comments -- it is not meant to instruct; PHP developers familiar with the HTML specifications will see the logic, and others can always refer to the htmLawed documentation. The compact structuring of the statements is meant to aid a quick grasp of the logic. + +___________________________________________________________________oo + + +@@description: htmLawed PHP software is a free, open-source, customizable HTML input purifier and filter +@@encoding: utf-8 +@@keywords: htmLawed, HTM, HTML, HTML Tidy, converter, filter, formatter, purifier, sanitizer, XSS, input, PHP, software, code, script, security, cross-site scripting, hack, sanitize, remove, standards, tags, attributes, elements +@@language: en @@title: htmLawed documentation \ No newline at end of file diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt b/mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt old mode 100755 new mode 100644 index 793a5a6a7..c5cccaaba --- a/mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt +++ b/mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt @@ -1,8 +1,8 @@ /* -htmLawed_TESTCASE.txt, 22 October 2011 -htmLawed 1.1.11, 5 June 2012 +htmLawed_TESTCASE.txt, 27 August 2013 +htmLawed 1.1.16, 29 August 2013 Copyright Santosh Patnaik -Dual licensed with LGPL 3 and GPL 2 or later +Dual licensed with LGPL 3 and GPL 2+ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed */ @@ -27,6 +27,8 @@ character encoding to Unicode/UTF-8 Duplicated: a
    Deprecated: a,

    Casing:
    +Custom: image
    +Data-*: a
    Admin-restricted?:
    Attribute values
    @@ -46,6 +48,11 @@ character encoding to Unicode/UTF-8
    abc
    def

    abc
    def
    ghi

    abc
    def
    ghi
    +
    QQQ
    x

    +
    x
    QQQ

    +
    x
    QQQ
    x

    +
    x
    QQQ

    x


    +
    (try with blockquote parent)
    CDATA sections
    @@ -128,8 +135,9 @@ Disallowed tag p Invalid: a
    Empty: a, a, atext
    Content invalid: 12
    -Content invalid?:

    (try setting 'form' as parent) -Casing: +Content invalid?:

    (try setting 'form' as parent)
    +Casing:
    +Check for tidy:



    hi
    Entities
    @@ -198,6 +206,13 @@ text none t e x t Malformed: , < ![CDATA check ]]>, < ![CDATA check ] ]>
    Invalid: >comment in tag content, +
    HTML5
    + +figure and figcaption:
    picture
    Caption for the awesome picture
    +article:

    A

    B

    C

    E

    F

    G

    +meter:

    Heat 150.

    +datalist: +
    Ins-Del
    (depending on context, these elements can be of either block or inline type)
    @@ -258,6 +273,10 @@ Invalid: >comment in tag content, +Menu:
  • + +
  • +
    Microdata
    @@ -266,6 +285,16 @@ I am X but people call me www.xy.com +
    Microsoft Word
    + +Proprietary tag:

     


    +XML declaration:
    +XML-invalid character code-point (may not replicate):

    “Where is he?” asked both Mary – the one so lovely – and Jane.

    + +
    Nesting
    + +Block or inline a:

    text

    hi

    +
    Non-English text-1
    Inscrieţi-vă acum la a Zecea Conferinţă Internaţională
    @@ -313,6 +342,7 @@ na Alemanha. (aaa) +
    Tables
    Omitted closing tags: @@ -339,6 +369,14 @@ na Alemanha.
    r2c1r2c2

    +
    Tag transformation
    +Font element intended as 'inline' element:

    hi


    +Font element intended as 'block' element:
    hi

    +Font element intended as 'block' element:
    hi
    QQQ

    + +
    Tidy
    +White-space handling: abc def ghi abc def ghi +
    URLs
    Relative and absolute: , , , , , ,
    @@ -364,6 +402,7 @@ src=javascript:al test
    Bad IE7: x
    +Opera: link Bad IE7: xxx
    Bad IE7: xxx
    Bad IE7: xxx
    @@ -393,4 +432,19 @@ script:eval(document.all.mycode.expr)')">hi
    3 < 4
    3 > 4
    - > 3
    \ No newline at end of file + > 3
    +<._.> hi!
    +<<< ALERT >>>
    + some stuff
    +
    +
    +
    +if(13age){say 'teen'}
    +age >51 and a smoking history of >51 pack-years was
    +age > 51 and a smoking history of >51 pack-years was
    +age <51 and a smoking history of <51 pack-years was
    +age < 51 and a smoking history of < 51 pack-years was
    +age >51 and a smoking history of >51 pack-years
    +age > 51 and a smoking history of >51 pack-years
    +age <51 and a smoking history of <51 pack-years
    +age < 51 and a smoking history of < 51 pack-years
    \ No newline at end of file -- cgit v1.2.3 From abc688bc155583a522beb1107e8ee35f98656e84 Mon Sep 17 00:00:00 2001 From: Sem Date: Sun, 10 Nov 2013 09:44:11 +0100 Subject: Replaced README. --- README.md | 28 ++++++++++++++++++++++++++++ README.txt | 24 ------------------------ 2 files changed, 28 insertions(+), 24 deletions(-) create mode 100644 README.md delete mode 100644 README.txt diff --git a/README.md b/README.md new file mode 100644 index 000000000..a1b3e1cbf --- /dev/null +++ b/README.md @@ -0,0 +1,28 @@ +Lorea +===== + +(Re-)Taking the Networks! + +The [lorea](https://lorea.org) code aims at providing individuals and teams with privacy-aware, security-conscious, and user-controlled-data collaborative tools over the Web, and around it. + +The main application is based on [Elgg](http://elgg.org), a PHP-based social networking platform. Lorea extends it with plugins to provide better privacy features, including strong encryption, *OStatus-based federation* with other Lorea/Elgg installations and OStatus-compliant projects, etc. + +It also integrates other popular technologies such as [DokuWiki](http://www.dokuwiki.org), [Etherpad](http://etherpad.org), [XMPP](http://xmpp.org), etc., and provides *GPG-encrypted mailing-lists* to groups. + +### Installation + +Our code is divided in two git repositories: elgg and lorea-plugins. You can get it using the following commands. + +
    +$ git clone git://gitorious.org/lorea/elgg.git
    +$ cd elgg
    +$ git remote add lorea-plugins git://gitorious.org/lorea/lorea-plugins.git
    +$ git pull lorea-plugins master
    +
    + +You can update the code to the latest release using: + +
    +$ git pull origin master
    +$ git pull lorea-plugins master
    +
    diff --git a/README.txt b/README.txt deleted file mode 100644 index dd604fd2b..000000000 --- a/README.txt +++ /dev/null @@ -1,24 +0,0 @@ -Elgg -Copyright (c) 2008-2013 See COPYRIGHT.txt - -See CONTRIBUTORS.txt for development credits. - -Elgg is managed by the Elgg Foundation, a nonprofit organization that was -founded to govern, protect, and promote the Elgg open source social network -engine. The Foundation aims to provide a stable, commercially and -individually independent organization that operates in the best interest of Elgg -as an open source project. - -The project site can be found at http://elgg.org/ - -The Elgg project was started in 2004 by: -Ben Werdmuller and -Dave Tosh - -Elgg is released under the GNU General Public License (GPL) Version 2 and the -Massachusetts Institute of Technology (MIT) License. See LICENSE.txt -in the root of the package you downloaded. - -For installation instructions, see INSTALL.txt. - -For upgrade instructions, see UPGRADE.txt. -- cgit v1.2.3 From ec735cc9f846455041a99dd6e2d183d60f5c476e Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Wed, 20 Nov 2013 23:10:11 -0500 Subject: Fixing permissions changes from #6201 --- mod/htmlawed/vendors/htmLawed/htmLawed.php | 0 mod/htmlawed/vendors/htmLawed/htmLawedTest.php | 0 mod/htmlawed/vendors/htmLawed/htmLawed_README.htm | 0 mod/htmlawed/vendors/htmLawed/htmLawed_README.txt | 0 mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt | 0 5 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 mod/htmlawed/vendors/htmLawed/htmLawed.php mode change 100644 => 100755 mod/htmlawed/vendors/htmLawed/htmLawedTest.php mode change 100644 => 100755 mod/htmlawed/vendors/htmLawed/htmLawed_README.htm mode change 100644 => 100755 mod/htmlawed/vendors/htmLawed/htmLawed_README.txt mode change 100644 => 100755 mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed.php b/mod/htmlawed/vendors/htmLawed/htmLawed.php old mode 100644 new mode 100755 diff --git a/mod/htmlawed/vendors/htmLawed/htmLawedTest.php b/mod/htmlawed/vendors/htmLawed/htmLawedTest.php old mode 100644 new mode 100755 diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed_README.htm b/mod/htmlawed/vendors/htmLawed/htmLawed_README.htm old mode 100644 new mode 100755 diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed_README.txt b/mod/htmlawed/vendors/htmLawed/htmLawed_README.txt old mode 100644 new mode 100755 diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt b/mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt old mode 100644 new mode 100755 -- cgit v1.2.3 From 55c39ae0980bced8a03739fc25c6d876979d3572 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Sat, 23 Nov 2013 18:35:11 -0500 Subject: Removes border-radius from radios/checkboxes --- views/default/css/elements/forms.php | 1 + 1 file changed, 1 insertion(+) diff --git a/views/default/css/elements/forms.php b/views/default/css/elements/forms.php index f55e57fb4..068cc8fd6 100644 --- a/views/default/css/elements/forms.php +++ b/views/default/css/elements/forms.php @@ -69,6 +69,7 @@ input[type="radio"] { margin:0 3px 0 0; padding:0; border:none; + border-radius:0; width:auto; } .elgg-input-checkboxes.elgg-horizontal li, -- cgit v1.2.3 From 76b7a8f78975098a5cd09f794feb34d4f7a4ee76 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Sun, 1 Dec 2013 21:50:20 -0500 Subject: Make sure new lang key available during upgrade --- engine/lib/upgrades/2013060900-1.8.15-site_secret-404fc165cf9e0ac9.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/engine/lib/upgrades/2013060900-1.8.15-site_secret-404fc165cf9e0ac9.php b/engine/lib/upgrades/2013060900-1.8.15-site_secret-404fc165cf9e0ac9.php index b5b614762..538d74dd6 100644 --- a/engine/lib/upgrades/2013060900-1.8.15-site_secret-404fc165cf9e0ac9.php +++ b/engine/lib/upgrades/2013060900-1.8.15-site_secret-404fc165cf9e0ac9.php @@ -9,5 +9,8 @@ $strength = _elgg_get_site_secret_strength(); if ($strength !== 'strong') { + // a new key is needed immediately + register_translations(elgg_get_root_path() . 'languages/'); + elgg_add_admin_notice('weak_site_key', elgg_echo("upgrade:site_secret_warning:$strength")); } -- cgit v1.2.3 From 47f209929b1913a73b8051d35d5545d28a37dba7 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Sun, 1 Dec 2013 21:56:48 -0500 Subject: Code style fixes in ElggCrypto --- engine/classes/ElggCrypto.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/engine/classes/ElggCrypto.php b/engine/classes/ElggCrypto.php index b6a8b2024..317d371e4 100644 --- a/engine/classes/ElggCrypto.php +++ b/engine/classes/ElggCrypto.php @@ -96,12 +96,12 @@ class ElggCrypto { } do { - $bytes = ($total > $hash_len)? $hash_len : $total; + $bytes = ($total > $hash_len) ? $hash_len : $total; $total -= $bytes; //collect any entropy available from the PHP system and filesystem $entropy = rand() . uniqid(mt_rand(), true) . $SSLstr; - $entropy .= implode('', @fstat(@fopen( __FILE__, 'r'))); + $entropy .= implode('', @fstat(@fopen(__FILE__, 'r'))); $entropy .= memory_get_usage() . getmypid(); $entropy .= serialize($_ENV) . serialize($_SERVER); if (function_exists('posix_times')) { @@ -113,7 +113,7 @@ class ElggCrypto { if ($handle) { $entropy .= @fread($handle, $bytes); - } else { + } else { // Measure the time that the operations will take on average for ($i = 0; $i < 3; $i++) { $c1 = microtime(true); @@ -162,9 +162,9 @@ class ElggCrypto { * Uses supplied character list for generating the new string. * If no character list provided - uses Base64 URL character set. * - * @param int $length Desired length of the string - * @param string|null $chars Characters to be chosen from randomly. If not given, the Base64 URL - * charset will be used. + * @param int $length Desired length of the string + * @param string|null $chars Characters to be chosen from randomly. If not given, the Base64 URL + * charset will be used. * * @return string The random string * -- cgit v1.2.3 From beab3edd8f0b821b7e90e288add261342505321f Mon Sep 17 00:00:00 2001 From: Ed Lyons Date: Mon, 2 Dec 2013 06:34:08 -0500 Subject: Fix #6238 Return blank arrays I committed this simple change to return the empty array rather than false for empty settings result for ElggPlugin::getAllSettings and getAllUsersSettings. Put this into 1.8 branch. --- engine/classes/ElggPlugin.php | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/engine/classes/ElggPlugin.php b/engine/classes/ElggPlugin.php index 7bf6eb1df..81831d8cd 100644 --- a/engine/classes/ElggPlugin.php +++ b/engine/classes/ElggPlugin.php @@ -299,17 +299,16 @@ class ElggPlugin extends ElggObject { $private_settings = get_data($q); + $return = array(); + if ($private_settings) { - $return = array(); - + foreach ($private_settings as $setting) { $return[$setting->name] = $setting->value; } + } - return $return; - } - - return false; + return $return; } /** @@ -423,9 +422,10 @@ class ElggPlugin extends ElggObject { $private_settings = get_data($q); - if ($private_settings) { - $return = array(); - + $return = array(); + + if ($private_settings) { + foreach ($private_settings as $setting) { $name = substr($setting->name, $ps_prefix_len); $value = $setting->value; @@ -433,10 +433,9 @@ class ElggPlugin extends ElggObject { $return[$name] = $value; } - return $return; } - return false; + return $return; } /** -- cgit v1.2.3 From 908628e622d798c5036f7e25bc5b7d6d36d64754 Mon Sep 17 00:00:00 2001 From: Jerome Bakker Date: Mon, 2 Dec 2013 14:56:20 +0100 Subject: fixes #3143 login part --- engine/lib/memcache.php | 20 ++++++++++++++++++++ engine/lib/sessions.php | 6 ++++++ 2 files changed, 26 insertions(+) diff --git a/engine/lib/memcache.php b/engine/lib/memcache.php index f79fba4a9..79b87e850 100644 --- a/engine/lib/memcache.php +++ b/engine/lib/memcache.php @@ -35,3 +35,23 @@ function is_memcache_available() { return $memcache_available; } + +/** + * Invalidate an entity in memcache + * + * @param int $entity_guid The GUID of the entity to invalidate + * + * @return void + * @access private + */ +function _elgg_invalidate_memcache_for_entity($entity_guid) { + static $newentity_cache; + + if ((!$newentity_cache) && (is_memcache_available())) { + $newentity_cache = new ElggMemcache('new_entity_cache'); + } + + if ($newentity_cache) { + $newentity_cache->delete($entity_guid); + } +} \ No newline at end of file diff --git a/engine/lib/sessions.php b/engine/lib/sessions.php index fb28e1e9a..e3d5ce9cd 100644 --- a/engine/lib/sessions.php +++ b/engine/lib/sessions.php @@ -326,6 +326,12 @@ function login(ElggUser $user, $persistent = false) { set_last_login($_SESSION['guid']); reset_login_failure_count($user->guid); // Reset any previous failed login attempts + // if memcache is enabled, invalidate the user in memcache @see https://github.com/Elgg/Elgg/issues/3143 + if (is_memcache_available()) { + // this needs to happen with a shutdown function because of the timing with set_last_login() + register_shutdown_function("_elgg_invalidate_memcache_for_entity", $_SESSION['guid']); + } + return true; } -- cgit v1.2.3 From 63db31c0237d24247b3b75b8365d610be936c283 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Sat, 7 Dec 2013 19:27:18 -0500 Subject: Handle case if get_user_notification_settings() returns false --- engine/lib/notification.php | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/engine/lib/notification.php b/engine/lib/notification.php index b6399b3c6..2506867d5 100644 --- a/engine/lib/notification.php +++ b/engine/lib/notification.php @@ -110,12 +110,15 @@ function notify_user($to, $from, $subject, $message, array $params = NULL, $meth // Are we overriding delivery? $methods = $methods_override; if (!$methods) { - $tmp = (array)get_user_notification_settings($guid); + $tmp = get_user_notification_settings($guid); $methods = array(); - foreach ($tmp as $k => $v) { - // Add method if method is turned on for user! - if ($v) { - $methods[] = $k; + // $tmp may be false. don't cast + if (is_array($tmp)) { + foreach ($tmp as $k => $v) { + // Add method if method is turned on for user! + if ($v) { + $methods[] = $k; + } } } } -- cgit v1.2.3 From f4420e017f6dc2039e8e3910e5b98c0eb17a7be2 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Sat, 7 Dec 2013 20:27:32 -0500 Subject: Fixes #6012: Gets correct client IP behind proxy (1.8) --- engine/lib/system_log.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/engine/lib/system_log.php b/engine/lib/system_log.php index 5a153afb2..bed863755 100644 --- a/engine/lib/system_log.php +++ b/engine/lib/system_log.php @@ -187,7 +187,11 @@ function system_log($object, $event) { $object_subtype = $object->getSubtype(); $event = sanitise_string($event); $time = time(); - $ip_address = sanitise_string($_SERVER['REMOTE_ADDR']); + if (!empty($_SERVER['HTTP_X_FORWARDED_FOR'])) { + $ip_address = array_pop(explode(',', $_SERVER['HTTP_X_FORWARDED_FOR'])); + } else { + $ip_address = sanitise_string($_SERVER['REMOTE_ADDR']); + } $performed_by = elgg_get_logged_in_user_guid(); if (isset($object->access_id)) { -- cgit v1.2.3 From 5866c12f7b5cc7c9fa922324a26c419a66fc5ea3 Mon Sep 17 00:00:00 2001 From: Paweł Sroka Date: Sun, 8 Dec 2013 04:13:02 +0100 Subject: Checks X-Real-Ip header as well when determining client IP --- engine/lib/system_log.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/engine/lib/system_log.php b/engine/lib/system_log.php index bed863755..84302632e 100644 --- a/engine/lib/system_log.php +++ b/engine/lib/system_log.php @@ -187,11 +187,16 @@ function system_log($object, $event) { $object_subtype = $object->getSubtype(); $event = sanitise_string($event); $time = time(); + if (!empty($_SERVER['HTTP_X_FORWARDED_FOR'])) { $ip_address = array_pop(explode(',', $_SERVER['HTTP_X_FORWARDED_FOR'])); + } elseif (!empty($_SERVER['HTTP_X_REAL_IP'])) { + $ip_address = array_pop(explode(',', $_SERVER['HTTP_X_REAL_IP'])); } else { - $ip_address = sanitise_string($_SERVER['REMOTE_ADDR']); + $ip_address = $_SERVER['REMOTE_ADDR']; } + $ip_address = sanitise_string($ip_address); + $performed_by = elgg_get_logged_in_user_guid(); if (isset($object->access_id)) { -- cgit v1.2.3 From 54d559f8ce84877f4dc2ccf84d00fd5047669d88 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Mon, 9 Dec 2013 09:26:28 -0500 Subject: Code style fixes for recent PR --- engine/classes/ElggPlugin.php | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/engine/classes/ElggPlugin.php b/engine/classes/ElggPlugin.php index 81831d8cd..545b9a53c 100644 --- a/engine/classes/ElggPlugin.php +++ b/engine/classes/ElggPlugin.php @@ -300,13 +300,12 @@ class ElggPlugin extends ElggObject { $private_settings = get_data($q); $return = array(); - + if ($private_settings) { - foreach ($private_settings as $setting) { $return[$setting->name] = $setting->value; } - } + } return $return; } @@ -423,16 +422,14 @@ class ElggPlugin extends ElggObject { $private_settings = get_data($q); $return = array(); - - if ($private_settings) { - + + if ($private_settings) { foreach ($private_settings as $setting) { $name = substr($setting->name, $ps_prefix_len); $value = $setting->value; $return[$name] = $value; } - } return $return; -- cgit v1.2.3 From d1d37b4116338ebfa0871f74776c36a5549a4591 Mon Sep 17 00:00:00 2001 From: Juho Jaakkola Date: Wed, 11 Dec 2013 09:06:42 +0200 Subject: Makes sure all group pages respect the limited_groups setting --- mod/groups/lib/groups.php | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/mod/groups/lib/groups.php b/mod/groups/lib/groups.php index 77d7c09cc..f07ab5dc6 100644 --- a/mod/groups/lib/groups.php +++ b/mod/groups/lib/groups.php @@ -55,7 +55,7 @@ function groups_handle_all_page() { } $filter = elgg_view('groups/group_sort_menu', array('selected' => $selected_tab)); - + $sidebar = elgg_view('groups/sidebar/find'); $sidebar .= elgg_view('groups/sidebar/featured'); @@ -115,7 +115,9 @@ function groups_handle_owned_page() { } elgg_push_breadcrumb($title); - elgg_register_title_button(); + if (elgg_get_plugin_setting('limited_groups', 'groups') != 'yes' || elgg_is_admin_logged_in()) { + elgg_register_title_button(); + } $content = elgg_list_entities(array( 'type' => 'group', @@ -150,7 +152,9 @@ function groups_handle_mine_page() { } elgg_push_breadcrumb($title); - elgg_register_title_button(); + if (elgg_get_plugin_setting('limited_groups', 'groups') != 'yes' || elgg_is_admin_logged_in()) { + elgg_register_title_button(); + } $content = elgg_list_entities_from_relationship(array( 'type' => 'group', @@ -181,7 +185,7 @@ function groups_handle_mine_page() { */ function groups_handle_edit_page($page, $guid = 0) { gatekeeper(); - + if ($page == 'add') { elgg_set_page_owner_guid(elgg_get_logged_in_user_guid()); $title = elgg_echo('groups:add'); @@ -204,7 +208,7 @@ function groups_handle_edit_page($page, $guid = 0) { $content = elgg_echo('groups:noaccess'); } } - + $params = array( 'content' => $content, 'title' => $title, @@ -266,7 +270,7 @@ function groups_handle_profile_page($guid) { $content = elgg_view('groups/profile/layout', array('entity' => $group)); $sidebar = ''; - if (group_gatekeeper(false)) { + if (group_gatekeeper(false)) { if (elgg_is_active_plugin('search')) { $sidebar .= elgg_view('groups/sidebar/search', array('entity' => $group)); } @@ -275,18 +279,18 @@ function groups_handle_profile_page($guid) { $subscribed = false; if (elgg_is_active_plugin('notifications')) { global $NOTIFICATION_HANDLERS; - + foreach ($NOTIFICATION_HANDLERS as $method => $foo) { $relationship = check_entity_relationship(elgg_get_logged_in_user_guid(), 'notify' . $method, $guid); - + if ($relationship) { $subscribed = true; break; } } } - + $sidebar .= elgg_view('groups/sidebar/my_status', array( 'entity' => $group, 'subscribed' => $subscribed @@ -334,7 +338,7 @@ function groups_handle_activity_page($guid) { if (!$content) { $content = '

    ' . elgg_echo('groups:activity:none') . '

    '; } - + $params = array( 'content' => $content, 'title' => $title, @@ -427,7 +431,7 @@ function groups_handle_invite_page($guid) { /** * Manage requests to join a group - * + * * @param int $guid Group entity GUID */ function groups_handle_requests_page($guid) { @@ -443,7 +447,7 @@ function groups_handle_requests_page($guid) { if ($group && $group->canEdit()) { elgg_push_breadcrumb($group->name, $group->getURL()); elgg_push_breadcrumb($title); - + $requests = elgg_get_entities_from_relationship(array( 'type' => 'user', 'relationship' => 'membership_request', -- cgit v1.2.3 From b0a62995706d2b7796ad30c7e0bc6471cb268de0 Mon Sep 17 00:00:00 2001 From: Brett Profitt Date: Wed, 11 Dec 2013 09:28:08 -0500 Subject: Fixed directions for going to the site secret generation page. --- languages/en.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/languages/en.php b/languages/en.php index 4eed63d4c..07407d1e1 100644 --- a/languages/en.php +++ b/languages/en.php @@ -1093,8 +1093,8 @@ Once you have logged in, we highly recommend that you change your password. 'update:twitter_api:deactivated' => 'Twitter API (previously Twitter Service) was deactivated during the upgrade. Please activate it manually if required.', 'update:oauth_api:deactivated' => 'OAuth API (previously OAuth Lib) was deactivated during the upgrade. Please activate it manually if required.', - 'upgrade:site_secret_warning:moderate' => "You are encouraged to regenerate your site key to improve system security. See Configure > Site Secret", - 'upgrade:site_secret_warning:weak' => "You are strongly encouraged to regenerate your site key to improve system security. See Configure > Site Secret", + 'upgrade:site_secret_warning:moderate' => "You are encouraged to regenerate your site key to improve system security. See Configure > Settings > Site Secret", + 'upgrade:site_secret_warning:weak' => "You are strongly encouraged to regenerate your site key to improve system security. See Configure > Settings > Site Secret", 'deprecated:function' => '%s() was deprecated by %s()', -- cgit v1.2.3 From e5dd9906d16d97aed7ba3511b7e1132bd4da3761 Mon Sep 17 00:00:00 2001 From: capo Date: Mon, 16 Dec 2013 00:12:26 +0100 Subject: Added feature to be able to use simplepie rss module in the groups --- actions/simplepie/group_module.php | 8 ++ languages/es.php | 16 +++ start.php | 7 ++ views/default/forms/simplepie/group_module.php | 35 +++++++ views/default/simplepie/group_module.php | 113 +++++++++++++++++++++ views/default/widgets/feed_reader/content.php.save | 81 +++++++++++++++ 6 files changed, 260 insertions(+) create mode 100644 actions/simplepie/group_module.php create mode 100644 languages/es.php create mode 100644 views/default/forms/simplepie/group_module.php create mode 100644 views/default/simplepie/group_module.php create mode 100644 views/default/widgets/feed_reader/content.php.save diff --git a/actions/simplepie/group_module.php b/actions/simplepie/group_module.php new file mode 100644 index 000000000..93fe353de --- /dev/null +++ b/actions/simplepie/group_module.php @@ -0,0 +1,8 @@ +canEdit()) { + $group->feed_url = $feed_url; +} diff --git a/languages/es.php b/languages/es.php new file mode 100644 index 000000000..51b53e9f2 --- /dev/null +++ b/languages/es.php @@ -0,0 +1,16 @@ + 'Enlace RSS', + 'simplepie:description' => 'Agregar un blog externo', + 'simplepie:notset' => 'Enlace RSS no configurado', + 'simplepie:notfind' => 'no se encontro el feed. Revisa el feed url.', + 'simplepie:feed_url' => 'Feed URL', + 'simplepie:num_items' => 'Numero de items', + 'simplepie:excerpt' => 'Incluir contenido', + 'simplepie:post_date' => 'Incluir fecha del post', + 'simplepie:postedon' => 'Posted on', +); + +add_translation("en", $english); + diff --git a/start.php b/start.php index 20adc7545..26c2e5017 100644 --- a/start.php +++ b/start.php @@ -20,4 +20,11 @@ function simplepie_init() { $lib = elgg_get_plugins_path() . 'simplepie/vendors/simplepie.inc'; elgg_register_library('simplepie', $lib); + + // Add group option + add_group_tool_option('rss', elgg_echo('simplepie:enablerss'), false); + elgg_extend_view('groups/tool_latest', 'simplepie/group_module'); + + elgg_register_action('simplepie/group_module', elgg_get_plugins_path() . 'simplepie/actions/simplepie/group_module.php'); } + diff --git a/views/default/forms/simplepie/group_module.php b/views/default/forms/simplepie/group_module.php new file mode 100644 index 000000000..a7c1f6858 --- /dev/null +++ b/views/default/forms/simplepie/group_module.php @@ -0,0 +1,35 @@ + 'feed_url', + 'value' => $vars['entity']->feed_url, +)); + +$group_field = elgg_view('input/hidden', array( + 'name' => 'group_guid', + 'value' => $vars['entity']->guid, +)); + +$save_button = elgg_view('input/submit', array( + 'value' => elgg_echo('save'), +)); + + + + +echo << + $url_label + $url_textbox + $group_field + +
    + $save_button +
    +HTML; diff --git a/views/default/simplepie/group_module.php b/views/default/simplepie/group_module.php new file mode 100644 index 000000000..7d0a7bc2f --- /dev/null +++ b/views/default/simplepie/group_module.php @@ -0,0 +1,113 @@ +rss_enable != "yes") { + return true; +} + +elgg_push_context('widgets'); + +$allowed_tags = '