diff options
author | Sem <sembrestels@riseup.net> | 2012-07-01 17:25:25 +0200 |
---|---|---|
committer | Sem <sembrestels@riseup.net> | 2012-07-01 17:25:25 +0200 |
commit | 2a616ab95e64154175ff3a0008728197406653e3 (patch) | |
tree | 17955dcdd48cba8250d8d68c78a4f646eadfa7a8 /mod/htmlawed/vendors/htmLawed | |
parent | d547dd1136ba7142e62f95398fb8af69d0495334 (diff) | |
download | elgg-2a616ab95e64154175ff3a0008728197406653e3.tar.gz elgg-2a616ab95e64154175ff3a0008728197406653e3.tar.bz2 |
Fixes #4609. Ugraded htmlawed lib.
Diffstat (limited to 'mod/htmlawed/vendors/htmLawed')
-rwxr-xr-x[-rw-r--r--] | mod/htmlawed/vendors/htmLawed/htmLawed.php | 54 | ||||
-rwxr-xr-x[-rw-r--r--] | mod/htmlawed/vendors/htmLawed/htmLawedTest.php | 29 | ||||
-rw-r--r-- | mod/htmlawed/vendors/htmLawed/htmLawed_README.htm | 198 | ||||
-rwxr-xr-x[-rw-r--r--] | mod/htmlawed/vendors/htmLawed/htmLawed_README.txt | 84 | ||||
-rwxr-xr-x[-rw-r--r--] | mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt | 41 |
5 files changed, 290 insertions, 116 deletions
diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed.php b/mod/htmlawed/vendors/htmLawed/htmLawed.php index 2556fdcf2..0d9624961 100644..100755 --- a/mod/htmlawed/vendors/htmLawed/htmLawed.php +++ b/mod/htmlawed/vendors/htmLawed/htmLawed.php @@ -1,9 +1,9 @@ <?php /* -htmLawed 1.1.9, 22 December 2009 +htmLawed 1.1.11, 5 June 2012 Copyright Santosh Patnaik -GPL v3 license +Dual licensed with LGPL 3 and GPL 2 or later A PHP Labware internal utility; www.bioinformatics.org/phplabware/internal_utilities/htmLawed See htmLawed_README.txt/htm @@ -51,7 +51,7 @@ foreach(explode(';', str_replace(array(' ', "\t", "\r", "\n"), '', $x)) as $v){ if($x2){$C['schemes'][$x] = array_flip(explode(',', $x2));} } if(!isset($C['schemes']['*'])){$C['schemes']['*'] = array('file'=>1, 'http'=>1, 'https'=>1,);} -if(!empty($C['safe']) && empty($C['schemes']['style'])){$C['schemes']['style'] = array('nil'=>1);} +if(!empty($C['safe']) && empty($C['schemes']['style'])){$C['schemes']['style'] = array('!'=>1);} $C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0; if(!isset($C['base_url']) or !preg_match('`^[a-zA-Z\d.+\-]+://[^/]+/(.+?/)?$`', $C['base_url'])){ $C['base_url'] = $C['abs_url'] = 0; @@ -65,6 +65,7 @@ $C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0); $C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char']; $C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0); $C['css_expression'] = empty($C['css_expression']) ? 0 : 1; +$C['direct_list_nest'] = empty($C['direct_list_nest']) ? 0 : 1; $C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1; $C['hook'] = (!empty($C['hook']) && function_exists($C['hook'])) ? $C['hook'] : 0; $C['hook_tag'] = (!empty($C['hook_tag']) && function_exists($C['hook_tag'])) ? $C['hook_tag'] : 0; @@ -149,14 +150,15 @@ $cI = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'b'=>1, 'bdo'=>1, 'bi $cN = array('a'=>array('a'=>1), 'button'=>array('a'=>1, 'button'=>1, 'fieldset'=>1, 'form'=>1, 'iframe'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'fieldset'=>array('fieldset'=>1), 'form'=>array('form'=>1), 'label'=>array('label'=>1), 'noscript'=>array('script'=>1), 'pre'=>array('big'=>1, 'font'=>1, 'img'=>1, 'object'=>1, 'script'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1), 'rb'=>array('ruby'=>1), 'rt'=>array('ruby'=>1)); // Illegal $cN2 = array_keys($cN); $cR = array('blockquote'=>1, 'dir'=>1, 'dl'=>1, 'form'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1); -$cS = array('colgroup'=>array('col'=>1), 'dir'=>array('li'), 'dl'=>array('dd'=>1, 'dt'=>1), 'menu'=>array('li'=>1), 'ol'=>array('li'=>1), 'optgroup'=>array('option'=>1), 'option'=>array('#pcdata'=>1), 'rbc'=>array('rb'=>1), 'rp'=>array('#pcdata'=>1), 'rtc'=>array('rt'=>1), 'ruby'=>array('rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1), 'select'=>array('optgroup'=>1, 'option'=>1), 'script'=>array('#pcdata'=>1), 'table'=>array('caption'=>1, 'col'=>1, 'colgroup'=>1, 'tfoot'=>1, 'tbody'=>1, 'tr'=>1, 'thead'=>1), 'tbody'=>array('tr'=>1), 'tfoot'=>array('tr'=>1), 'textarea'=>array('#pcdata'=>1), 'thead'=>array('tr'=>1), 'tr'=>array('td'=>1, 'th'=>1), 'ul'=>array('li'=>1)); // Specific - immediate parent-child +$cS = array('colgroup'=>array('col'=>1), 'dir'=>array('li'=>1), 'dl'=>array('dd'=>1, 'dt'=>1), 'menu'=>array('li'=>1), 'ol'=>array('li'=>1), 'optgroup'=>array('option'=>1), 'option'=>array('#pcdata'=>1), 'rbc'=>array('rb'=>1), 'rp'=>array('#pcdata'=>1), 'rtc'=>array('rt'=>1), 'ruby'=>array('rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1), 'select'=>array('optgroup'=>1, 'option'=>1), 'script'=>array('#pcdata'=>1), 'table'=>array('caption'=>1, 'col'=>1, 'colgroup'=>1, 'tfoot'=>1, 'tbody'=>1, 'tr'=>1, 'thead'=>1), 'tbody'=>array('tr'=>1), 'tfoot'=>array('tr'=>1), 'textarea'=>array('#pcdata'=>1), 'thead'=>array('tr'=>1), 'tr'=>array('td'=>1, 'th'=>1), 'ul'=>array('li'=>1)); // Specific - immediate parent-child +if($GLOBALS['C']['direct_list_nest']){$cS['ol'] = $cS['ul'] += array('ol'=>1, 'ul'=>1);} $cO = array('address'=>array('p'=>1), 'applet'=>array('param'=>1), 'blockquote'=>array('script'=>1), 'fieldset'=>array('legend'=>1, '#pcdata'=>1), 'form'=>array('script'=>1), 'map'=>array('area'=>1), 'object'=>array('param'=>1, 'embed'=>1)); // Other $cT = array('colgroup'=>1, 'dd'=>1, 'dt'=>1, 'li'=>1, 'option'=>1, 'p'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1); // Omitable closing // block/inline type; ins & del both type; #pcdata: text $eB = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'del'=>1, 'dir'=>1, 'dl'=>1, 'div'=>1, 'fieldset'=>1, 'form'=>1, 'ins'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'isindex'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'table'=>1, 'ul'=>1); -$eI = array('#pcdata'=>1, 'a'=>1, 'abbr'=>1, 'acronym'=>1, 'applet'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'br'=>1, 'button'=>1, 'cite'=>1, 'code'=>1, 'del'=>1, 'dfn'=>1, 'em'=>1, 'embed'=>1, 'font'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'kbd'=>1, 'label'=>1, 'map'=>1, 'object'=>1, 'param'=>1, 'q'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'select'=>1, 'script'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); +$eI = array('#pcdata'=>1, 'a'=>1, 'abbr'=>1, 'acronym'=>1, 'applet'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'br'=>1, 'button'=>1, 'cite'=>1, 'code'=>1, 'del'=>1, 'dfn'=>1, 'em'=>1, 'embed'=>1, 'font'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'kbd'=>1, 'label'=>1, 'map'=>1, 'object'=>1, 'q'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'select'=>1, 'script'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); $eN = array('a'=>1, 'big'=>1, 'button'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'label'=>1, 'object'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1); // Exclude from specific ele; $cN values -$eO = array('area'=>1, 'caption'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'dt'=>1, 'legend'=>1, 'li'=>1, 'optgroup'=>1, 'option'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'script'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'thead'=>1, 'th'=>1, 'tr'=>1); // Missing in $eB & $eI +$eO = array('area'=>1, 'caption'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'dt'=>1, 'legend'=>1, 'li'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'script'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'thead'=>1, 'th'=>1, 'tr'=>1); // Missing in $eB & $eI $eF = $eB + $eI; // $in sets allowed child @@ -295,20 +297,14 @@ function hl_cmtcd($t){ // comment/CDATA sec handler $t = $t[0]; global $C; -if($t[3] == '-'){ - if(!$C['comment']){return $t;} - if($C['comment'] == 1){return '';} +if(!($v = $C[$n = $t[3] == '-' ? 'comment' : 'cdata'])){return $t;} +if($v == 1){return '';} +if($n == 'comment'){ if(substr(($t = preg_replace('`--+`', '-', substr($t, 4, -3))), -1) != ' '){$t .= ' ';} - $t = $C['comment'] == 2 ? str_replace(array('&', '<', '>'), array('&', '<', '>'), $t) : $t; - $t = "\x01\x02\x04!--$t--\x05\x02\x01"; -}else{ // CDATA - if(!$C['cdata']){return $t;} - if($C['cdata'] == 1){return '';} - $t = substr($t, 1, -1); - $t = $C['cdata'] == 2 ? str_replace(array('&', '<', '>'), array('&', '<', '>'), $t) : $t; - $t = "\x01\x01\x04$t\x05\x01\x01"; -} -return str_replace(array('&', '<', '>'), array("\x03", "\x04", "\x05"), $t); +} +else{$t = substr($t, 1, -1);} +$t = $v == 2 ? str_replace(array('&', '<', '>'), array('&', '<', '>'), $t) : $t; +return str_replace(array('&', '<', '>'), array("\x03", "\x04", "\x05"), ($n == 'comment' ? "\x01\x02\x04!--$t--\x05\x02\x01" : "\x01\x01\x04$t\x05\x01\x01")); // eof } @@ -334,9 +330,11 @@ global $C; $b = $a = ''; if($c == null){$c = 'style'; $b = $p[1]; $a = $p[3]; $p = trim($p[2]);} $c = isset($C['schemes'][$c]) ? $C['schemes'][$c] : $C['schemes']['*']; -if(isset($c['*']) or !strcspn($p, '#?;')){return "{$b}{$p}{$a}";} // All ok, frag, query, param +static $d = 'denied:'; +if(isset($c['!']) && substr($p, 0, 7) != $d){$p = "$d$p";} +if(isset($c['*']) or !strcspn($p, '#?;') or (substr($p, 0, 7) == $d)){return "{$b}{$p}{$a}";} // All ok, frag, query, param if(preg_match('`^([a-z\d\-+.&#; ]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])){ // Denied prot - return "{$b}denied:{$p}{$a}"; + return "{$b}{$d}{$p}{$a}"; } if($C['abs_url']){ if($C['abs_url'] == -1 && strpos($p, $C['base_url']) === 0){ // Make url rel @@ -429,11 +427,11 @@ if($C['make_tag_strict'] && isset($eD[$e])){ // close tag static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty ele if(!empty($m[1])){ - return (!isset($eE[$e]) ? "</$e>" : (($C['keep_bad'])%2 ? str_replace(array('<', '>'), array('<', '>'), $t) : '')); + return (!isset($eE[$e]) ? (empty($C['hook_tag']) ? "</$e>" : $C['hook_tag']($e)) : (($C['keep_bad'])%2 ? str_replace(array('<', '>'), array('<', '>'), $t) : '')); } // open tag & attr -static $aN = array('abbr'=>array('td'=>1, 'th'=>1), 'accept-charset'=>array('form'=>1), 'accept'=>array('form'=>1, 'input'=>1), 'accesskey'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'legend'=>1, 'textarea'=>1), 'action'=>array('form'=>1), 'align'=>array('caption'=>1, 'embed'=>1, 'applet'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'legend'=>1, 'table'=>1, 'hr'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'p'=>1, 'col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'alt'=>array('applet'=>1, 'area'=>1, 'img'=>1, 'input'=>1), 'archive'=>array('applet'=>1, 'object'=>1), 'axis'=>array('td'=>1, 'th'=>1), 'bgcolor'=>array('embed'=>1, 'table'=>1, 'tr'=>1, 'td'=>1, 'th'=>1), 'border'=>array('table'=>1, 'img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'cellpadding'=>array('table'=>1), 'cellspacing'=>array('table'=>1), 'char'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charoff'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charset'=>array('a'=>1, 'script'=>1), 'checked'=>array('input'=>1), 'cite'=>array('blockquote'=>1, 'q'=>1, 'del'=>1, 'ins'=>1), 'classid'=>array('object'=>1), 'clear'=>array('br'=>1), 'code'=>array('applet'=>1), 'codebase'=>array('object'=>1, 'applet'=>1), 'codetype'=>array('object'=>1), 'color'=>array('font'=>1), 'cols'=>array('textarea'=>1), 'colspan'=>array('td'=>1, 'th'=>1), 'compact'=>array('dir'=>1, 'dl'=>1, 'menu'=>1, 'ol'=>1, 'ul'=>1), 'coords'=>array('area'=>1, 'a'=>1), 'data'=>array('object'=>1), 'datetime'=>array('del'=>1, 'ins'=>1), 'declare'=>array('object'=>1), 'defer'=>array('script'=>1), 'dir'=>array('bdo'=>1), 'disabled'=>array('button'=>1, 'input'=>1, 'optgroup'=>1, 'option'=>1, 'select'=>1, 'textarea'=>1), 'enctype'=>array('form'=>1), 'face'=>array('font'=>1), 'for'=>array('label'=>1), 'frame'=>array('table'=>1), 'frameborder'=>array('iframe'=>1), 'headers'=>array('td'=>1, 'th'=>1), 'height'=>array('embed'=>1, 'iframe'=>1, 'td'=>1, 'th'=>1, 'img'=>1, 'object'=>1, 'applet'=>1), 'href'=>array('a'=>1, 'area'=>1), 'hreflang'=>array('a'=>1), 'hspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'ismap'=>array('img'=>1, 'input'=>1), 'label'=>array('option'=>1, 'optgroup'=>1), 'language'=>array('script'=>1), 'longdesc'=>array('img'=>1, 'iframe'=>1), 'marginheight'=>array('iframe'=>1), 'marginwidth'=>array('iframe'=>1), 'maxlength'=>array('input'=>1), 'method'=>array('form'=>1), 'model'=>array('embed'=>1), 'multiple'=>array('select'=>1), 'name'=>array('button'=>1, 'embed'=>1, 'textarea'=>1, 'applet'=>1, 'select'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'a'=>1, 'input'=>1, 'object'=>1, 'map'=>1, 'param'=>1), 'nohref'=>array('area'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'object'=>array('applet'=>1), 'onblur'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onchange'=>array('input'=>1, 'select'=>1, 'textarea'=>1), 'onfocus'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onreset'=>array('form'=>1), 'onselect'=>array('input'=>1, 'textarea'=>1), 'onsubmit'=>array('form'=>1), 'pluginspage'=>array('embed'=>1), 'pluginurl'=>array('embed'=>1), 'prompt'=>array('isindex'=>1), 'readonly'=>array('textarea'=>1, 'input'=>1), 'rel'=>array('a'=>1), 'rev'=>array('a'=>1), 'rows'=>array('textarea'=>1), 'rowspan'=>array('td'=>1, 'th'=>1), 'rules'=>array('table'=>1), 'scope'=>array('td'=>1, 'th'=>1), 'scrolling'=>array('iframe'=>1), 'selected'=>array('option'=>1), 'shape'=>array('area'=>1, 'a'=>1), 'size'=>array('hr'=>1, 'font'=>1, 'input'=>1, 'select'=>1), 'span'=>array('col'=>1, 'colgroup'=>1), 'src'=>array('embed'=>1, 'script'=>1, 'input'=>1, 'iframe'=>1, 'img'=>1), 'standby'=>array('object'=>1), 'start'=>array('ol'=>1), 'summary'=>array('table'=>1), 'tabindex'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'object'=>1, 'select'=>1, 'textarea'=>1), 'target'=>array('a'=>1, 'area'=>1, 'form'=>1), 'type'=>array('a'=>1, 'embed'=>1, 'object'=>1, 'param'=>1, 'script'=>1, 'input'=>1, 'li'=>1, 'ol'=>1, 'ul'=>1, 'button'=>1), 'usemap'=>array('img'=>1, 'input'=>1, 'object'=>1), 'valign'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'value'=>array('input'=>1, 'option'=>1, 'param'=>1, 'button'=>1, 'li'=>1), 'valuetype'=>array('param'=>1), 'vspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'width'=>array('embed'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'object'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'applet'=>1, 'col'=>1, 'colgroup'=>1, 'pre'=>1), 'wmode'=>array('embed'=>1), 'xml:space'=>array('pre'=>1, 'script'=>1, 'style'=>1)); // Ele-specific +static $aN = array('abbr'=>array('td'=>1, 'th'=>1), 'accept-charset'=>array('form'=>1), 'accept'=>array('form'=>1, 'input'=>1), 'accesskey'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'legend'=>1, 'textarea'=>1), 'action'=>array('form'=>1), 'align'=>array('caption'=>1, 'embed'=>1, 'applet'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'legend'=>1, 'table'=>1, 'hr'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'p'=>1, 'col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'alt'=>array('applet'=>1, 'area'=>1, 'img'=>1, 'input'=>1), 'archive'=>array('applet'=>1, 'object'=>1), 'axis'=>array('td'=>1, 'th'=>1), 'bgcolor'=>array('embed'=>1, 'table'=>1, 'tr'=>1, 'td'=>1, 'th'=>1), 'border'=>array('table'=>1, 'img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'cellpadding'=>array('table'=>1), 'cellspacing'=>array('table'=>1), 'char'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charoff'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charset'=>array('a'=>1, 'script'=>1), 'checked'=>array('input'=>1), 'cite'=>array('blockquote'=>1, 'q'=>1, 'del'=>1, 'ins'=>1), 'classid'=>array('object'=>1), 'clear'=>array('br'=>1), 'code'=>array('applet'=>1), 'codebase'=>array('object'=>1, 'applet'=>1), 'codetype'=>array('object'=>1), 'color'=>array('font'=>1), 'cols'=>array('textarea'=>1), 'colspan'=>array('td'=>1, 'th'=>1), 'compact'=>array('dir'=>1, 'dl'=>1, 'menu'=>1, 'ol'=>1, 'ul'=>1), 'coords'=>array('area'=>1, 'a'=>1), 'data'=>array('object'=>1), 'datetime'=>array('del'=>1, 'ins'=>1), 'declare'=>array('object'=>1), 'defer'=>array('script'=>1), 'dir'=>array('bdo'=>1), 'disabled'=>array('button'=>1, 'input'=>1, 'optgroup'=>1, 'option'=>1, 'select'=>1, 'textarea'=>1), 'enctype'=>array('form'=>1), 'face'=>array('font'=>1), 'flashvars'=>array('embed'=>1), 'for'=>array('label'=>1), 'frame'=>array('table'=>1), 'frameborder'=>array('iframe'=>1), 'headers'=>array('td'=>1, 'th'=>1), 'height'=>array('embed'=>1, 'iframe'=>1, 'td'=>1, 'th'=>1, 'img'=>1, 'object'=>1, 'applet'=>1), 'href'=>array('a'=>1, 'area'=>1), 'hreflang'=>array('a'=>1), 'hspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'ismap'=>array('img'=>1, 'input'=>1), 'label'=>array('option'=>1, 'optgroup'=>1), 'language'=>array('script'=>1), 'longdesc'=>array('img'=>1, 'iframe'=>1), 'marginheight'=>array('iframe'=>1), 'marginwidth'=>array('iframe'=>1), 'maxlength'=>array('input'=>1), 'method'=>array('form'=>1), 'model'=>array('embed'=>1), 'multiple'=>array('select'=>1), 'name'=>array('button'=>1, 'embed'=>1, 'textarea'=>1, 'applet'=>1, 'select'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'a'=>1, 'input'=>1, 'object'=>1, 'map'=>1, 'param'=>1), 'nohref'=>array('area'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'object'=>array('applet'=>1), 'onblur'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onchange'=>array('input'=>1, 'select'=>1, 'textarea'=>1), 'onfocus'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onreset'=>array('form'=>1), 'onselect'=>array('input'=>1, 'textarea'=>1), 'onsubmit'=>array('form'=>1), 'pluginspage'=>array('embed'=>1), 'pluginurl'=>array('embed'=>1), 'prompt'=>array('isindex'=>1), 'readonly'=>array('textarea'=>1, 'input'=>1), 'rel'=>array('a'=>1), 'rev'=>array('a'=>1), 'rows'=>array('textarea'=>1), 'rowspan'=>array('td'=>1, 'th'=>1), 'rules'=>array('table'=>1), 'scope'=>array('td'=>1, 'th'=>1), 'scrolling'=>array('iframe'=>1), 'selected'=>array('option'=>1), 'shape'=>array('area'=>1, 'a'=>1), 'size'=>array('hr'=>1, 'font'=>1, 'input'=>1, 'select'=>1), 'span'=>array('col'=>1, 'colgroup'=>1), 'src'=>array('embed'=>1, 'script'=>1, 'input'=>1, 'iframe'=>1, 'img'=>1), 'standby'=>array('object'=>1), 'start'=>array('ol'=>1), 'summary'=>array('table'=>1), 'tabindex'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'object'=>1, 'select'=>1, 'textarea'=>1), 'target'=>array('a'=>1, 'area'=>1, 'form'=>1), 'type'=>array('a'=>1, 'embed'=>1, 'object'=>1, 'param'=>1, 'script'=>1, 'input'=>1, 'li'=>1, 'ol'=>1, 'ul'=>1, 'button'=>1), 'usemap'=>array('img'=>1, 'input'=>1, 'object'=>1), 'valign'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'value'=>array('input'=>1, 'option'=>1, 'param'=>1, 'button'=>1, 'li'=>1), 'valuetype'=>array('param'=>1), 'vspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'width'=>array('embed'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'object'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'applet'=>1, 'col'=>1, 'colgroup'=>1, 'pre'=>1), 'wmode'=>array('embed'=>1), 'xml:space'=>array('pre'=>1, 'script'=>1, 'style'=>1)); // Ele-specific static $aNE = array('checked'=>1, 'compact'=>1, 'declare'=>1, 'defer'=>1, 'disabled'=>1, 'ismap'=>1, 'multiple'=>1, 'nohref'=>1, 'noresize'=>1, 'noshade'=>1, 'nowrap'=>1, 'readonly'=>1, 'selected'=>1); // Empty static $aNP = array('action'=>1, 'cite'=>1, 'classid'=>1, 'codebase'=>1, 'data'=>1, 'href'=>1, 'longdesc'=>1, 'model'=>1, 'pluginspage'=>1, 'pluginurl'=>1, 'usemap'=>1); // Need scheme check; excludes style, on* & src static $aNU = array('class'=>array('param'=>1, 'script'=>1), 'dir'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'id'=>array('script'=>1), 'lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'xml:lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'onclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'ondblclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeydown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeypress'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeyup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousedown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousemove'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseout'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseover'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'style'=>array('param'=>1, 'script'=>1), 'title'=>array('param'=>1, 'script'=>1)); // Univ & exceptions @@ -472,8 +470,8 @@ while(strlen($a)){ $aA[$nm] = ''; } break; case 2: // Val - if(preg_match('`^"[^"]*"`', $a, $m) or preg_match("`^'[^']*'`", $a, $m) or preg_match("`^\s*[^\s\"']+`", $a, $m)){ - $m = $m[0]; $w = 1; $mode = 0; $a = ltrim(substr_replace($a, '', 0, strlen($m))); + if(preg_match('`^((?:"[^"]*")|(?:\'[^\']*\')|(?:\s*[^\s"\']+))(.*)`', $a, $m)){ + $a = ltrim($m[2]); $m = $m[1]; $w = 1; $mode = 0; $aA[$nm] = trim(($m[0] == '"' or $m[0] == '\'') ? substr($m, 1, -1) : $m); } break; @@ -500,7 +498,7 @@ foreach($aA as $k=>$v){ static $sC = array(' '=>' ', ' '=>' ', 'E'=>'e', 'E'=>'e', 'e'=>'e', 'e'=>'e', 'X'=>'x', 'X'=>'x', 'x'=>'x', 'x'=>'x', 'P'=>'p', 'P'=>'p', 'p'=>'p', 'p'=>'p', 'S'=>'s', 'S'=>'s', 's'=>'s', 's'=>'s', 'I'=>'i', 'I'=>'i', 'i'=>'i', 'i'=>'i', 'O'=>'o', 'O'=>'o', 'o'=>'o', 'o'=>'o', 'N'=>'n', 'N'=>'n', 'n'=>'n', 'n'=>'n', 'U'=>'u', 'U'=>'u', 'u'=>'u', 'u'=>'u', 'R'=>'r', 'R'=>'r', 'r'=>'r', 'r'=>'r', 'L'=>'l', 'L'=>'l', 'l'=>'l', 'l'=>'l', '('=>'(', '('=>'(', ')'=>')', ')'=>')', ' '=>':', ' '=>':', '"'=>'"', '"'=>'"', '''=>"'", '''=>"'", '/'=>'/', '/'=>'/', '*'=>'*', '*'=>'*', '\'=>'\\', '\'=>'\\'); $v = strtr($v, $sC); } - $v = preg_replace_callback('`(url(?:\()(?: )*(?:\'|"|&(?:quot|apos);)?)(.+)((?:\'|"|&(?:quot|apos);)?(?: )*(?:\)))`iS', 'hl_prot', $v); + $v = preg_replace_callback('`(url(?:\()(?: )*(?:\'|"|&(?:quot|apos);)?)(.+?)((?:\'|"|&(?:quot|apos);)?(?: )*(?:\)))`iS', 'hl_prot', $v); $v = !$C['css_expression'] ? preg_replace('`expression`i', ' ', preg_replace('`\\\\\S|(/|(%2f))(\*|(%2a))`i', ' ', $v)) : $v; }elseif(isset($aNP[$k]) or strpos($k, 'src') !== false or $k[0] == 'o'){ $v = str_replace("\xad", ' ', (strpos($v, '&') !== false ? str_replace(array('­', '­', '­'), ' ', $v) : $v)); @@ -643,7 +641,7 @@ return ''; function hl_tidy($t, $w, $p){ // Tidy/compact HTM if(strpos(' pre,script,textarea', "$p,")){return $t;} -$t = str_replace(' </', '</', preg_replace(array('`(<\w[^>]*(?<!/)>)\s+`', '`\s+`', '`(<\w[^>]*(?<!/)>) `'), array(' $1', ' ', '$1'), preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea).*?>)(.+?)(</\2>)`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t))); +$t = str_replace(' </', '</', preg_replace(array('`(<\w[^>]*(?<!/)>)\s+`', '`\s+`', '`(<\w[^>]*(?<!/)>) `'), array(' $1', ' ', '$1'), preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)(</\2>)`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t))); if(($w = strtolower($w)) == -1){ return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t); } @@ -688,7 +686,7 @@ return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array( function hl_version(){ // rel -return '1.1.9'; +return '1.1.11'; // eof } diff --git a/mod/htmlawed/vendors/htmLawed/htmLawedTest.php b/mod/htmlawed/vendors/htmLawed/htmLawedTest.php index 160bd012d..806aa4641 100644..100755 --- a/mod/htmlawed/vendors/htmLawed/htmLawedTest.php +++ b/mod/htmlawed/vendors/htmLawed/htmLawedTest.php @@ -1,10 +1,10 @@ <?php /* -htmLawedTest.php, 16 July 2009 -htmLawed 1.1.9, 22 December 2009 +htmLawedTest.php, 22 October 2011 +htmLawed 1.1.11, 5 June 2012 Copyright Santosh Patnaik -GPL v3 license +Dual licensed with LGPL 3 and GPL 2 or later A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed Test htmLawed; user provides text input; input and processed input are shown as highlighted code and rendered HTML; also shown are execution time and peak memory usage @@ -44,7 +44,9 @@ if(get_magic_quotes_gpc()){ } ini_set('magic_quotes_gpc', 0); } -set_magic_quotes_runtime(0); +if(get_magic_quotes_runtime()){ + set_magic_quotes_runtime(0); +} $_POST['enc'] = (isset($_POST['enc']) and preg_match('`^[-\w]+$`', $_POST['enc'])) ? $_POST['enc'] : 'utf-8'; @@ -328,7 +330,7 @@ tRs = { a.appendChild(document.createTextNode("\u2195")); a.style.cursor = 'n-resize'; a.className= 'resizer'; - a.title = 'click-drag to resize' + a.title = 'click-drag to resize textarea' tRs.adEv(a, 'mousedown', tRs.initResize); textareas[i].parentNode.appendChild(a); } @@ -420,7 +422,7 @@ else{ } ?> -<span style="float:right;" class="help"><span style="font-size: 85%;">Encoding: </span><input type="text" size="8" id="enc" name="enc" style="vertical-align: middle;" value="<?php echo htmlspecialchars($_POST['enc']); ?>" title="IANA-recognized name of the input character-set; can be multiple ;- or space-separated values; may not work in some browsers" /></span> +<span style="float:right;" class="help" title="IANA-recognized name of the input character-set; can be multiple ;- or space-separated values; may not work in some browsers"><span style="font-size: 85%;">Encoding: </span><input type="text" size="8" id="enc" name="enc" style="vertical-align: middle;" value="<?php echo htmlspecialchars($_POST['enc']); ?>" /></span> </div> <br style="clear:both;" /> @@ -454,6 +456,7 @@ $cfg = array( 'comment'=>array('4', 'nil', 'allow HTML comments', 'nil'), 'css_expression'=>array('2', 'nil', 'allow dynamic expressions in CSS style properties', 'nil'), 'deny_attribute'=>array('1', '0', 'denied attributes', '0', '50', '', 'these'), +'direct_list_nest'=>array('2', 'nil', 'allow direct nesting of a list within another without requiring it to be a list item', 'nil'), 'elements'=>array('', '', 'allowed elements', '50'), 'hexdec_entity'=>array('3', '1', 'convert hexadecimal numeric entities to decimal ones, or vice versa', '0'), 'hook'=>array('', '', 'name of hook function', '25'), @@ -516,23 +519,23 @@ if($do){ } } - if($cfg['anti_link_spam'] && (!empty($cfg['anti_link_spam11']) or !empty($cfg['anti_link_spam12']))){ + if(isset($cfg['anti_link_spam']) && $cfg['anti_link_spam'] && (!empty($cfg['anti_link_spam11']) or !empty($cfg['anti_link_spam12']))){ $cfg['anti_link_spam'] = array($cfg['anti_link_spam11'], $cfg['anti_link_spam12']); } unset($cfg['anti_link_spam11'], $cfg['anti_link_spam12']); - if($cfg['anti_mail_spam'] == 1){ + if(isset($cfg['anti_mail_spam']) && $cfg['anti_mail_spam'] == 1){ $cfg['anti_mail_spam'] = isset($cfg['anti_mail_spam1'][0]) ? $cfg['anti_mail_spam1'] : 0; } unset($cfg['anti_mail_spam11']); - if($cfg['deny_attribute'] == 1){ + if(isset($cfg['deny_attribute']) && $cfg['deny_attribute'] == 1){ $cfg['deny_attribute'] = isset($cfg['deny_attribute1'][0]) ? $cfg['deny_attribute1'] : 0; } unset($cfg['deny_attribute1']); - if($cfg['tidy'] == 2){ + if(isset($cfg['tidy']) && $cfg['tidy'] == 2){ $cfg['tidy'] = isset($cfg['tidy2'][0]) ? $cfg['tidy2'] : 0; } unset($cfg['tidy2']); - if($cfg['unique_ids'] == 2){ + if(isset($cfg['unique_ids']) && $cfg['unique_ids'] == 2){ $cfg['unique_ids'] = isset($cfg['unique_ids2'][0]) ? $cfg['unique_ids2'] : 1; } unset($cfg['unique_ids2']); @@ -540,9 +543,9 @@ if($do){ $cfg['show_setting'] = 'hlcfg'; $st = microtime(); - $out = htmLawed($_POST['text'], $cfg, str_replace(array('$', '{'), '', $_POST['spec'])); + $out = htmLawed($_POST['text'], $cfg, $_POST['spec']); $et = microtime(); - echo '<br /><a href="htmLawedTest.php" title="[toggle visibility] syntax-highlighted" onclick="javascript:toggle(\'inputR\'); return false;"><span class="notice">Input code »</span></a> <span class="help" title="tags estimated as half of total > and < chars; values may be inaccurate for non-ASCII text"><small><big>', strlen($_POST['text']), '</big> chars, ~<big>', round((substr_count($_POST['text'], '>') + substr_count($_POST['text'], '<'))/2), '</big> tags</small> </span><div id="inputR" style="display: none;">', format($_POST['text']), '</div><script type="text/javascript">hl(\'inputR\');</script>', (!isset($_POST['text'][$_hlimit]) ? ' <a href="htmLawedTest.php" title="[toggle visibility] hexdump; non-viewable characters like line-returns are shown as dots" onclick="javascript:toggle(\'inputD\'); return false;"><span class="notice">Input binary » </span></a><div id="inputD" style="display: none;">'. hexdump($_POST['text']). '</div>' : ''), ' <a href="htmLawedTest.php" title="[toggle visibility] finalized internal settings as interpreted by htmLawed; for developers" onclick="javascript:toggle(\'settingF\'); return false;"><span class="notice">Finalized internal settings » </span></a> <div id="settingF" style="display: none;">', str_replace(array(' ', "\t", ' '), array(' ', ' ', ' '), nl2br(htmlspecialchars(print_r($GLOBALS['hlcfg']['config'], true)))), '</div><script type="text/javascript">hl(\'settingF\');</script>', '<br /><a href="htmLawedTest.php" title="[toggle visibility] suitable for copy-paste" onclick="javascript:toggle(\'outputF\'); return false;"><span class="notice">Output »</span></a> <span class="help" title="approx., server-specific value excluding the \'include()\' call"><small>htmLawed processing time <big>', number_format(((substr($et,0,9)) + (substr($et,-10)) - (substr($st,0,9)) - (substr($st,-10))),4), '</big> s</small></span>', (($mem = memory_get_peak_usage()) !== false ? '<span class="help"><small>, peak memory usage <big>'. round(($mem-$pre_mem)/1048576, 2). '</big> <small>MB</small>' : ''), '</small></span><div id="outputF" style="display: block;"><div><textarea id="text2" class="textarea" name="text2" rows="5" cols="100" style="width: 100%;">', htmlspecialchars($out), '</textarea></div><button type="button" onclick="javascript:document.getElementById(\'text2\').focus();document.getElementById(\'text2\').select()" title="select all to copy" style="float:right;">Select all</button>'; + echo '<br /><a href="htmLawedTest.php" title="[toggle visibility] syntax-highlighted" onclick="javascript:toggle(\'inputR\'); return false;"><span class="notice">Input code »</span></a> <span class="help" title="tags estimated as half of total > and < chars; values may be inaccurate for non-ASCII text"><small><big>', strlen($_POST['text']), '</big> chars, ~<big>', ($tag = round((substr_count($_POST['text'], '>') + substr_count($_POST['text'], '<'))/2)), '</big> tag', ($tag > 1 ? 's' : ''), '</small> </span><div id="inputR" style="display: none;">', format($_POST['text']), '</div><script type="text/javascript">hl(\'inputR\');</script>', (!isset($_POST['text'][$_hlimit]) ? ' <a href="htmLawedTest.php" title="[toggle visibility] hexdump; non-viewable characters like line-returns are shown as dots" onclick="javascript:toggle(\'inputD\'); return false;"><span class="notice">Input binary » </span></a><div id="inputD" style="display: none;">'. hexdump($_POST['text']). '</div>' : ''), ' <a href="htmLawedTest.php" title="[toggle visibility] finalized internal settings as interpreted by htmLawed; for developers" onclick="javascript:toggle(\'settingF\'); return false;"><span class="notice">Finalized internal settings » </span></a> <div id="settingF" style="display: none;">', str_replace(array(' ', "\t", ' '), array(' ', ' ', ' '), nl2br(htmlspecialchars(print_r($GLOBALS['hlcfg']['config'], true)))), '</div><script type="text/javascript">hl(\'settingF\');</script>', '<br /><a href="htmLawedTest.php" title="[toggle visibility] suitable for copy-paste" onclick="javascript:toggle(\'outputF\'); return false;"><span class="notice">Output »</span></a> <span class="help" title="approx., server-specific value excluding the \'include()\' call"><small>htmLawed processing time <big>', number_format(((substr($et,0,9)) + (substr($et,-10)) - (substr($st,0,9)) - (substr($st,-10))),4), '</big> s</small></span>', (($mem = memory_get_peak_usage()) !== false ? '<span class="help"><small>, peak memory usage <big>'. round(($mem-$pre_mem)/1048576, 2). '</big> <small>MB</small>' : ''), '</small></span><div id="outputF" style="display: block;"><div><textarea id="text2" class="textarea" name="text2" rows="5" cols="100" style="width: 100%;">', htmlspecialchars($out), '</textarea></div><button type="button" onclick="javascript:document.getElementById(\'text2\').focus();document.getElementById(\'text2\').select()" title="select all to copy" style="float:right;">Select all</button>'; if($_w3c_validate && $validation) { ?> diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed_README.htm b/mod/htmlawed/vendors/htmLawed/htmLawed_README.htm index 7138ee9c0..6dd78fb2e 100644 --- a/mod/htmlawed/vendors/htmLawed/htmLawed_README.htm +++ b/mod/htmlawed/vendors/htmLawed/htmLawed_README.htm @@ -7,40 +7,74 @@ <meta name="keywords" content="htmLawed, HTM, HTML, HTML Tidy, converter, filter, formatter, purifier, sanitizer, XSS, input, PHP, software, code, script, security, cross-site scripting, hack, sanitize, remove, standards, tags, attributes, elements, htmLawed_README.txt, rTxt2htm, PHP Labware" /> <style type="text/css" media="all"> <!--/*--><![CDATA[/*><!--*/ -a {text-decoration:none; color: blue;}
-a:hover {color: red;}
-a:visited {color: blue;}
-body {margin: 0; padding: 0;}
-body, div, html, p {font-family: Georgia, 'Times new roman', Times;}
-code.code {font-family: 'Bitstream vera sans mono', 'Courier New', 'Courier', monospace;}
-div.comment {padding: 5px; color: #999999; font-size: 80%;}
-div.comment a {color: #6699cc;}
-div#body {width: 70%; margin: 5px; padding: 5px;} /* holds non-toc content */
-div#toc {position: fixed; top: 5px; left: 73%; z-index: 2; margin-top: 5px; margin-left: 5px; border: 1px solid gray; padding: 5px; background-color: #ededed; width: 23%; overflow: auto; max-height:94%; font-size: 90%;} /* holds content table (toc) */
-div#top {font-size: 14px; margin: 5px; padding: 5px;} /* holds all content */
-div.monospace {overflow: auto; font-family: 'Bitstream vera sans mono', 'Courier New', 'Courier', monospace;}
-div.sub-section {padding-left: 15px;}
-div.sub-sub-section {padding-left: 30px;}
-h1 {font-size: 22px; margin-top: 5px; margin-bottom: 5px;}
-h2 {font-size: 20px; float: left; margin-top: 15px; margin-bottom: 5px;}
-h3 {font-size: 18px; float: left; margin-top: 15px; margin-bottom: 5px;}
-h4 {font-size: 16px; float: left; margin-top: 15px; margin-bottom: 5px;}
-hr {margin-top: 15px; margin-bottom: 5px;}
-input, textarea {font-family: 'Bitstream vera sans mono', 'Courier New', 'Courier', monospace;}
-p.subtle {color: gray; padding: 0; padding-top: 10px; margin: 0;}
-p.subtle a, p.subtle a:visited {color: #6699cc;}
-span.item-no {color: black;}
-span.subtle {color: gray; margin: 0; padding:0;}
-span.subtle a, span.subtle a:visited {color: #6699cc;}
-span.term {font-family: 'Bitstream vera sans mono', 'Courier New', 'Courier', monospace;}
-span.toc-item {color: black;}
-span.totop {float: right; margin-top: 15px; margin-bottom: 5px;}
-span.totop a, span.totop a:visited {color: #6699cc;}
-@media screen { /* fixes for old IE */
- * html, * html body {overflow-y: auto!important; height: 100%; margin: 0; padding: 0;}
- * html div#body {height: 100%; overflow-y: auto; position: relative;}
- * html div#toc {position: absolute;}
-}
+a {text-decoration:none; color: blue;} + +a:hover {color: red;} + +a:visited {color: blue;} + +body {margin: 0; padding: 0;} + +body, div, html, p {font-family: Georgia, 'Times new roman', Times;} + +code.code {font-family: 'Bitstream vera sans mono', 'Courier New', 'Courier', monospace;} + +div.comment {padding: 5px; color: #999999; font-size: 80%;} + +div.comment a {color: #6699cc;} + +div#body {width: 70%; margin: 5px; padding: 5px;} /* holds non-toc content */ + +div#toc {position: fixed; top: 5px; left: 73%; z-index: 2; margin-top: 5px; margin-left: 5px; border: 1px solid gray; padding: 5px; background-color: #ededed; width: 23%; overflow: auto; max-height:94%; font-size: 90%;} /* holds content table (toc) */ + +div#top {font-size: 14px; margin: 5px; padding: 5px;} /* holds all content */ + +div.monospace {overflow: auto; font-family: 'Bitstream vera sans mono', 'Courier New', 'Courier', monospace;} + +div.sub-section {padding-left: 15px;} + +div.sub-sub-section {padding-left: 30px;} + +h1 {font-size: 22px; margin-top: 5px; margin-bottom: 5px;} + +h2 {font-size: 20px; float: left; margin-top: 15px; margin-bottom: 5px;} + +h3 {font-size: 18px; float: left; margin-top: 15px; margin-bottom: 5px;} + +h4 {font-size: 16px; float: left; margin-top: 15px; margin-bottom: 5px;} + +hr {margin-top: 15px; margin-bottom: 5px;} + +input, textarea {font-family: 'Bitstream vera sans mono', 'Courier New', 'Courier', monospace;} + +p.subtle {color: gray; padding: 0; padding-top: 10px; margin: 0;} + +p.subtle a, p.subtle a:visited {color: #6699cc;} + +span.item-no {color: black;} + +span.subtle {color: gray; margin: 0; padding:0;} + +span.subtle a, span.subtle a:visited {color: #6699cc;} + +span.term {font-family: 'Bitstream vera sans mono', 'Courier New', 'Courier', monospace;} + +span.toc-item {color: black;} + +span.totop {float: right; margin-top: 15px; margin-bottom: 5px;} + +span.totop a, span.totop a:visited {color: #6699cc;} + +@media screen { /* fixes for old IE */ + + * html, * html body {overflow-y: auto!important; height: 100%; margin: 0; padding: 0;} + + * html div#body {height: 100%; overflow-y: auto; position: relative;} + + * html div#toc {position: absolute;} + +} + /*]]>*/--> </style> <title>htmLawed documentation | htmLawed PHP software is a free, open-source, customizable HTML input purifier and filter</title> @@ -110,10 +144,10 @@ span.totop a, span.totop a:visited {color: #6699cc;} <div id="body"> <br /> -<div class="comment">htmLawed_README.txt, 22 December 2009<br /> -htmLawed 1.1.9, 22 December 2009<br /> +<div class="comment">htmLawed_README.txt, 8 June 2012<br /> +htmLawed 1.1.11, 5 June 2012<br /> Copyright Santosh Patnaik<br /> -GPL v3 license<br /> +Dual licensed with LGPL 3 and GPL 2 or later<br /> A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed">http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed</a> </div> <br /> @@ -222,7 +256,7 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl <a name="s1.4" id="s1.4"></a><span class="item-no">1.4</span>  License & copyright </h3><span class="totop"><a href="#peak">(to top)</a></span><br style="clear: both;" /> <br /> -  htmLawed is free and open-source software licensed under GPL license version <a href="http://www.gnu.org/licenses/gpl-3.0.txt">3</a>, and copyrighted by Santosh Patnaik, MD, PhD.<br /> +  htmLawed is free and open-source software dual licensed under LGPL license version <a href="http://www.gnu.org/licenses/lgpl-3.0.txt">3</a> and GPL license version <a href="http://www.gnu.org/licenses/gpl-2.0.txt">2</a> or later, and copyrighted by Santosh Patnaik, MD, PhD.<br /> </div> <div class="sub-section"><h3> @@ -254,9 +288,11 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl <a name="s2" id="s2"></a><span class="item-no">2</span>  Usage </h2><span class="totop"><a href="#peak">(to top)</a></span><br style="clear: both;" /> <br /> -  htmLawed should work with PHP 4.3 and higher. Either <span class="term">include()</span> the <span class="term">htmLawed.php</span> file or copy-paste the entire code.<br /> +  htmLawed should work with PHP 4.4 and higher. Either <span class="term">include()</span> the <span class="term">htmLawed.php</span> file or copy-paste the entire code.<br /> <br />   To easily <strong>test</strong> htmLawed using a form-based interface, use the provided <a href="htmLawedTest.php">demo</a> (<span class="term">htmLawed.php</span> and <span class="term">htmLawedTest.php</span> should be in the same directory on the web-server).<br /> +<br /> +  <strong>Note</strong>: For code for usage of the htmLawed class (for htmLawed in OOP), please refer to the <a href="http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed">htmLawed</a> website; the filtering itself can be configured, etc., as described here.<br /> <div class="sub-section"><h3> <a name="s2.1" id="s2.1"></a><span class="item-no">2.1</span>  Simple @@ -371,6 +407,12 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl   <span class="term">string</span> - dictated by values in <span class="term">string</span><br />   <span class="term">on*</span> (like <span class="term">onfocus</span>) attributes not allowed - "<br /> <br /> +  <strong>direct_nest_list</strong><br /> +  Allow direct nesting of a list within another without requiring it to be a list item; see <a href="#s3.3.4">section 3.3.4</a><br /> +<br /> +  <span class="term">0</span> - no  *<br /> +  <span class="term">1</span> - yes<br /> +<br />   <strong>elements</strong><br />   Allowed HTML elements; see <a href="#s3.3">section 3.3</a><br /> <br /> @@ -441,11 +483,11 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl   <span class="term">1</span> - will auto-adjust other relevant <span class="term">$config</span> parameters (indicated by <span class="term">"</span> in this list)<br /> <br />   <strong>schemes</strong><br /> -  Array of attribute-specific, comma-separated, lower-cased list of schemes (protocols) allowed in attributes accepting URLs; <span class="term">*</span> covers all unspecified attributes; see <a href="#s3.4.3">section 3.4.3</a><br /> +  Array of attribute-specific, comma-separated, lower-cased list of schemes (protocols) allowed in attributes accepting URLs (or <span class="term">!</span> to <em>deny</em> any URL); <span class="term">*</span> covers all unspecified attributes; see <a href="#s3.4.3">section 3.4.3</a><br /> <br />   <span class="term">href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https</span>  *<br />   <span class="term">*: ftp, gopher, http, https, mailto, news, nntp, telnet</span>  ^<br /> -  <span class="term">href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; style: nil; *:file, http, https</span>  "<br /> +  <span class="term">href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; style: !; *:file, http, https</span>  "<br /> <br />   <strong>show_setting</strong><br />   Name of a PHP variable to assign the <em>finalized</em> <span class="term">$config</span> and <span class="term">$spec</span> values; see <a href="#s3.8">section 3.8</a><br /> @@ -541,7 +583,7 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl   <em>Rule</em>: <span class="term">input=title(), value(maxval=8/default=6)</span><br />   <em>Output</em>: <span class="term"><input title="WIDTH" value="6" /><input title="length" value="5" /></span><br /> <br /> -  <em>Rule</em>: <span class="term">input=title(nomatch=$w.d$i), value(match=$em$/default=6em)</span><br /> +  <em>Rule</em>: <span class="term">input=title(nomatch=%w.d%i), value(match=%em%/default=6em)</span><br />   <em>Output</em>: <span class="term"><input value="10em" /><input title="length" value="6em" /></span><br /> <br />   <em>Rule</em>: <span class="term">input=title(oneof=height|depth/default=depth), value(noneof=5|6)</span><br /> @@ -565,9 +607,9 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl <a name="s2.5" id="s2.5"></a><span class="item-no">2.5</span>  Some security risks to keep in mind </h3><span class="totop"><a href="#peak">(to top)</a></span><br style="clear: both;" /> <br /> -  When setting the parameters/arguments (like those to allow certain HTML elements) for use with htmLawed, one should bear in mind that the setting may let through potentially <em>dangerous</em> HTML code. (This may not be a problem if the authors are trusted.)<br /> +  When setting the parameters/arguments (like those to allow certain HTML elements) for use with htmLawed, one should bear in mind that the setting may let through potentially <em>dangerous</em> HTML code which is meant to steal user-data, deface a website, render a page non-functional, etc.<br /> <br /> -  For example, following increase security risks:<br /> +  Unless end-users, either people or software, supplying the content are completely trusted, security issues arising from the degree of HTML usage permission has to be kept in mind. For example, following increase security risks:<br /> <br />   *  Allowing <span class="term">script</span>, <span class="term">applet</span>, <span class="term">embed</span>, <span class="term">iframe</span> or <span class="term">object</span> elements, or certain of their attributes like <span class="term">allowscriptaccess</span><br /> <br /> @@ -575,7 +617,13 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl <br />   *  Allowing dynamic CSS expressions (a feature of the IE browser)<br /> <br /> -  <em>Unsafe</em> HTML can be removed by setting <span class="term">$config</span> appropriately. E.g., <span class="term">$config["elements"] = "* -script"</span> (<a href="#s3.3">section 3.3</a>), <span class="term">$config["safe"] = 1</span> (<a href="#s3.6">section 3.6</a>), etc.<br /> +  *  Allowing the <span class="term">style</span> attribute<br /> +<br /> +  To remove <em>unsecure</em> HTML, code-developers using htmLawed must set <span class="term">$config</span> appropriately. E.g., <span class="term">$config["elements"] = "* -script"</span> to deny the <span class="term">script</span> element (<a href="#s3.3">section 3.3</a>), <span class="term">$config["safe"] = 1</span> to auto-configure ceratin htmLawed parameters for maximizing security (<a href="#s3.6">section 3.6</a>), etc.<br /> +<br /> +  Permitting the <span class="term">*style*</span> attribute brings in risks of <em>click-jacking</em>, <em>phishing</em>, web-page overlays, etc., <em>even</em> when the <span class="term">safe</span> parameter is enabled (see <a href="#s3.6">section 3.6</a>). Except for URLs and a few other things like CSS dynamic expressions, htmLawed currently does not check every CSS style property. It does provide ways for the code-developer implementing htmLawed to do such checks through htmLawed's <span class="term">$spec</span> argument, and through the <span class="term">hook_tag</span> parameter (see <a href="#s3.4.8">section 3.4.8</a> for more). Disallowing <span class="term">style</span> completely and relying on CSS classes and stylesheet files is recommended.<br /> +<br /> +  htmLawed does not check or correct the character <strong>encoding</strong> of the input it receives. In conjunction with permitting circumstances such as when the character encoding is left undefined through HTTP headers or HTML <span class="term">meta</span> tags, this can permit an exploit (like Google's UTF-7/XSS vulnerability of the past).<br /> </div> <div class="sub-section"><h3> @@ -722,6 +770,8 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl <br />   *  Because of poor Unicode support in PHP, htmLawed does not remove the <em>high value</em> HTML-invalid characters with multi-byte code-points. Such characters however are extremely unlikely to be in the input. (see <a href="#s3.1">section 3.1</a>).<br /> <br /> +  *  htmLawed does not check or correct the character encoding of the input it receives. In conjunction with permitting circumstances such as when the character encoding is left undefined through HTTP headers or HTML <span class="term">meta</span> tags, this can permit an exploit (like Google's UTF-7/XSS vulnerability of the past).<br /> +<br />   *  Like any script using PHP's PCRE regex functions, PHP setup-specific low PCRE limit values can cause htmLawed to at least partially fail with very long input texts.<br /> </div> @@ -1162,6 +1212,8 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl   In some cases, the specs stipulate the number and/or the ordering of the child elements. A <span class="term">table</span> can have 0 or 1 <span class="term">caption</span>, <span class="term">tbody</span>, <span class="term">tfoot</span>, and <span class="term">thead</span>, but they must be in this order: <span class="term">caption</span>, <span class="term">thead</span>, <span class="term">tfoot</span>, <span class="term">tbody</span>.<br /> <br />   htmLawed currently does not check for conformance to these rules. Note that any non-compliance in this regard will not introduce security vulnerabilities, crash browser applications, or affect the rendering of web-pages.<br /> +<br /> +  With <span class="term">$config["direct_list_nest"]</span> set to <span class="term">1</span>, htmLawed will allow direct nesting of an <span class="term">ol</span> or <span class="term">ul</span> list within another <span class="term">ol</span> or <span class="term">ul</span> without requiring the child list to be within an <span class="term">li</span> of the parent list. While this is not standard-compliant, directly nested lists are rendered properly by almost all browsers. The parameter <span class="term">$config["direct_list_nest"]</span> has no effect if tag-balancing (<a href="#s3.3.3">section 3.3.3</a>) is turned off.<br /> </div> <div class="sub-section"><h3> @@ -1271,6 +1323,8 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl <br />   As a side-note, one may find <span class="term">style: *</span> useful as URLs in <span class="term">style</span> attributes can be specified in a variety of ways, and the patterns that htmLawed uses to identify URLs may mistakenly identify non-URL text.<br /> <br /> +  <span class="term">!</span> can be put in the list of schemes to disallow all protocols as well as <em>local</em> URLs. Thus, with <span class="term">href: http, style: !</span>, '<a href="http://cnn.com" style="background-image: url('local.jpg');">CNN</a>' will become '<a href="http://cnn.com" style="background-image: url('denied:local.jpg');">CNN</a>'.<br /> +<br />   <strong>Note</strong>: If URL-accepting attributes other than those listed above are being allowed, then the scheme will not be checked unless the attribute name contains the string <span class="term">src</span> (e.g., <span class="term">dynsrc</span>) or starts with <span class="term">o</span> (e.g., <span class="term">onbeforecopy</span>).<br /> <br />   With <span class="term">$config["safe"] = 1</span>, all URLs are disallowed in the <span class="term">style</span> attribute values.<br /> @@ -1488,7 +1542,7 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl <a name="s3.4.8" id="s3.4.8"></a><span class="item-no">3.4.8</span>  Inline style properties </h3><span class="totop"><a href="#peak">(to top)</a></span><br style="clear: both;" /> <br /> -  htmLawed can check URL schemes and dynamic expressions (to guard against Javascript, etc., script-based insecurities) in inline CSS style property values in the <span class="term">style</span> attributes. (CSS properties like <span class="term">background-image</span> that accept URLs in their values are noted in <a href="#s5.3">section 5.3</a>.) Dynamic CSS expressions that allow scripting in the IE browser, and can be a vulnerability, can be removed from property values by setting <span class="term">$config["css_expression"]</span> to <span class="term">1</span> (default setting).<br /> +  htmLawed can check URL schemes and dynamic expressions (to guard against Javascript, etc., script-based insecurities) in inline CSS style property values in the <span class="term">style</span> attributes. (CSS properties like <span class="term">background-image</span> that accept URLs in their values are noted in <a href="#s5.3">section 5.3</a>.) Dynamic CSS expressions that allow scripting in the IE browser, and can be a vulnerability, can be removed from property values by setting <span class="term">$config["css_expression"]</span> to <span class="term">1</span> (default setting). Note that when <span class="term">$config["css_expression"]</span> is set to <span class="term">1</span>, htmLawed will remove <span class="term">/*</span> from the <span class="term">style</span> values.<br /> <br />   <strong>Note</strong>: Because of the various ways of representing characters in attribute values (URL-escapement, entitification, etc.), htmLawed might alter the values of the <span class="term">style</span> attribute values, and may even falsely identify dynamic CSS expressions and URL schemes in them. If this is an important issue, checking of URLs and dynamic expressions can be turned off (<span class="term">$config["schemes"] = "...style:*..."</span>, see <a href="#s3.4.3">section 3.4.3</a>, and <span class="term">$config["css_expression"] = 0</span>). Alternately, admins can use their own custom function for finer handling of <span class="term">style</span> values through the <span class="term">hook_tag</span> parameter (see <a href="#s3.4.9">section 3.4.9</a>).<br /> <br /> @@ -1503,14 +1557,30 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl <br />   It is possible to utilize a custom hook function to alter the tag content htmLawed has finalized (i.e., after it has checked/corrected for required attributes, transformed attributes, lower-cased attribute names, etc.).<br /> <br /> -  When <span class="term">$config</span> parameter <span class="term">hook_tag</span> is set to the name of a function, htmLawed (function <span class="term">hl_tag()</span>) will pass on the element name, and the <em>finalized</em> attribute name-value pairs as array elements to the function. The function is expected to return the full opening tag string like <span class="term"><element_name attribute_1_name="attribute_1_value"...></span> (for empty elements like <span class="term">img</span> and <span class="term">input</span>, the element-closing slash <span class="term">/</span> should also be included).<br /> +  When <span class="term">$config</span> parameter <span class="term">hook_tag</span> is set to the name of a function, htmLawed (function <span class="term">hl_tag()</span>) will pass on the element name, and, in the case of an opening tag, the <em>finalized</em> attribute name-value pairs as array elements to the function. The function, after completing a task such as filtering or tag transformation, will typically return an empty string, the full opening tag string like <span class="term"><element_name attribute_1_name="attribute_1_value"...></span> (for empty elements like <span class="term">img</span> and <span class="term">input</span>, the element-closing slash <span class="term">/</span> should also be included), etc.<br /> +<br /> +  Any <span class="term">hook_tag</span> function, since htmLawed version 1.1.11, also receives names of elements in closing tags, such as <span class="term">a</span> in the closing <span class="term"></a></span> tag of the element <span class="term"><a href="http://cnn.com">CNN</a></span>. Unlike for opening tags, no other value (i.e., the attribute name-value array) is passed to the function since a closing tag contains only element names. Typically, the function will return an empty string or a full closing tag (like <span class="term"></a></span>).<br /> <br />   This is a <strong>powerful functionality</strong> that can be exploited for various objectives: consolidate-and-convert inline <span class="term">style</span> attributes to <span class="term">class</span>, convert <span class="term">embed</span> elements to <span class="term">object</span>, permit only one <span class="term">caption</span> element in a <span class="term">table</span> element, disallow embedding of certain types of media, <strong>inject HTML</strong>, use <a href="http://csstidy.sourceforge.net">CSSTidy</a> to sanitize <span class="term">style</span> attribute values, etc.<br /> <br />   As an example, the custom hook code below can be used to force a series of specifically ordered <span class="term">id</span> attributes on all elements, and a specific <span class="term">param</span> element inside all <span class="term">object</span> elements:<br /> <br /> -<code class="code">    function my_tag_function($element, $attribute_array){</code> +<code class="code">    function my_tag_function($element, $attribute_array=0){</code> +<br /> +<br /> + +<code class="code">      // If second argument is not received, it means a closing tag is being handled</code> +<br /> + +<code class="code">      if(is_numeric($attribute_array)){</code> +<br /> + +<code class="code">        return "</$element>";</code> +<br /> + +<code class="code">      }</code> +<br /> <br /> <code class="code">      static $id = 0;</code> @@ -1570,6 +1640,11 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl <code class="code">      }</code> <br /> +<br /> + +<code class="code">      static $empty_elements = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1);</code> +<br /> +<br /> <code class="code">      return "<{$element}{$string}". (isset($in_array($element, $empty_elements) ? ' /' : ''). '>'. $new_element;</code> <br /> @@ -1598,7 +1673,7 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl <br />   htmLawed allows an admin to use <span class="term">$config["safe"]</span> to auto-adjust multiple <span class="term">$config</span> parameters (such as <span class="term">elements</span> which declares the allowed element-set), which otherwise would have to be manually set. The relevant parameters are indicated by <span class="term">"</span> in <a href="#s2.2">section 2.2</a>). Thus, one can pass the <span class="term">$config</span> argument with a simpler value.<br /> <br /> -  With the value of <span class="term">1</span>, htmLawed considers <span class="term">CDATA</span> sections and HTML comments as plain text, and prohibits the <span class="term">applet</span>, <span class="term">embed</span>, <span class="term">iframe</span>, <span class="term">object</span> and <span class="term">script</span> elements, and the <span class="term">on*</span> attributes like <span class="term">onclick</span>. ( There are <span class="term">$config</span> parameters like <span class="term">css_expression</span> that are not affected by the value set for <span class="term">safe</span> but whose default values still contribute towards a more <em>safe</em> output.) Further, URLs with schemes (see <a href="#s3.4.3">section 3.4.3</a>) are neutralized so that, e.g., <span class="term">style="moz-binding:url(http://danger)"</span> becomes <span class="term">style="moz-binding:url(denied:http://danger)"</span> while <span class="term">style="moz-binding:url(ok)"</span> remains intact.<br /> +  With the value of <span class="term">1</span>, htmLawed considers <span class="term">CDATA</span> sections and HTML comments as plain text, and prohibits the <span class="term">applet</span>, <span class="term">embed</span>, <span class="term">iframe</span>, <span class="term">object</span> and <span class="term">script</span> elements, and the <span class="term">on*</span> attributes like <span class="term">onclick</span>. ( There are <span class="term">$config</span> parameters like <span class="term">css_expression</span> that are not affected by the value set for <span class="term">safe</span> but whose default values still contribute towards a more <em>safe</em> output.) Further, URLs with schemes (see <a href="#s3.4.3">section 3.4.3</a>) are neutralized so that, e.g., <span class="term">style="moz-binding:url(http://danger)"</span> becomes <span class="term">style="moz-binding:url(denied:http://danger)"</span>.<br /> <br />   Admins, however, may still want to completely deny the <span class="term">style</span> attribute, e.g., with code like<br /> <br /> @@ -1606,6 +1681,8 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl <code class="code">    $processed = htmLawed($text, array('safe'=>1, 'deny_attribute'=>'style'));</code> <br /> <br /> +  Permitting the <span class="term">style</span> attribute brings in risks of <em>click-jacking</em>, etc. CSS property values can render a page non-functional or be used to deface it. Except for URLs, dynamic expressions, and some other things, htmLawed does not completely check <span class="term">style</span> values. It does provide ways for the code-developer implementing htmLawed to do such checks through the <span class="term">$spec</span> argument, and through the <span class="term">hook_tag</span> parameter (see <a href="#s3.4.8">section 3.4.8</a> for more). Disallowing style completely and relying on CSS classes and stylesheet files is recommended.<br /> +<br />   If a value for a parameter auto-set through <span class="term">safe</span> is still manually provided, then that value can over-ride the auto-set value. E.g., with <span class="term">$config["safe"] = 1</span> and <span class="term">$config["elements"] = "*+script"</span>, <span class="term">script</span>, but not <span class="term">applet</span>, is allowed.<br /> <br />   A page illustrating the efficacy of htmLawed's anti-XSS abilities with <span class="term">safe</span> set to <span class="term">1</span> against XSS vectors listed by <a href="http://ha.ckers.org/xss.html">RSnake</a> may be available <a href="http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/rsnake/RSnakeXSSTest.htm">here</a>.<br /> @@ -1688,6 +1765,20 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl <br />   <em>Version number - Release date. Notes</em><br /> <br /> +  1.1.11 - 5 June 2012. Fix for possible problem with handling of multi-byte characters in attribute values in an mbstring.func_overload enviroment. <span class="term">$config["hook_tag"]</span>, if specified, now receives names of elements in closing tags.<br /> +<br /> +  1.1.10 - 22 October 2011. Fix for a bug in the <span class="term">tidy</span> functionality that caused the entire input to be replaced with a single space; new parameter, <span class="term">$config["direct_list_nest"]</span> to allow direct descendance of a list in a list. (5 April 2012. Dual licensing from LGPLv3 to LGPLv3 and GPLv2+.)<br /> +<br /> +  1.1.9.5 - 6 July 2011. Minor correction of a rule for nesting of <span class="term">li</span> within <span class="term">dir</span><br /> +<br /> +  1.1.9.4 - 3 July 2010. Parameter <span class="term">schemes</span> now accepts <span class="term">!</span> so any URL, even a local one, can be <em>denied</em>. An issue in which a second URL value in <span class="term">style</span> properties was not checked was fixed.<br /> +<br /> +  1.1.9.3 - 17 May 2010. Checks for correct nesting of <span class="term">param</span><br /> +<br /> +  1.1.9.2 - 26 April 2010. Minor fix regarding rendering of denied URL schemes<br /> +<br /> +  1.1.9.1 - 26 February 2010. htmLawed now uses the LGPL version 3 license; support for <span class="term">flashvars</span> attribute for <span class="term">embed</span><br /> +<br />   1.1.9 - 22 December 2009. Soft-hyphens are now removed only from URL-accepting attribute values<br /> <br />   1.1.8.1 - 16 July 2009. Minor code-change to fix a PHP error notice<br /> @@ -1738,6 +1829,10 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl <br />   Upgrading is as simple as replacing the previous version of <span class="term">htmLawed.php</span> (assuming it was not modified for customized features). As htmLawed output is almost always used in static documents, upgrading should not affect old, finalized content.<br /> <br /> +  <strong>Important</strong>  The following upgrades may affect the functionality of a specific htmLawed as indicated by their corresponding notes:<br /> +<br /> +  (1) From version 1.1-1.1.10 to 1.1.11, if a <span class="term">hook_tag</span> function is in use: In version 1.1.11, elements in closing tags (and not just the opening tags) are also passed to the function. There are no attribute names/values to pass, so a <span class="term">hook_tag</span> function receives only the element name. The <span class="term">hook_tag</span> function therefore may have to be edited. See <a href="#s3.4.9">section 3.4.9</a>.<br /> +<br />   Old versions of htmLawed may be available online. E.g., for version 1.0, check <a href="http://www.bioinformatics.org/phplabware/downloads/htmLawed1.zip">http://www.bioinformatics.org/phplabware/downloads/htmLawed1.zip</a>, for 1.1.1, htmLawed111.zip, and for 1.1.10, htmLawed1110.zip.<br /> </div> @@ -1789,7 +1884,7 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl <a name="s4.10" id="s4.10"></a><span class="item-no">4.10</span>  Acknowledgements </h3><span class="totop"><a href="#peak">(to top)</a></span><br style="clear: both;" /> <br /> -  Bryan Blakey, Ulf Harnhammer, Gareth Heyes, Lukasz Pilorz, Shelley Powers, Edward Yang, and many anonymous users.<br /> +  Nicholas Alipaz, Bryan Blakey, Pádraic Brady, Ulf Harnhammer, Gareth Heyes, Klaus Leithoff, Lukasz Pilorz, Shelley Powers, Edward Yang, and many anonymous users.<br /> <br />   Thank you!<br /> @@ -1856,6 +1951,7 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl   disabled - button, input, optgroup, option, select, textarea<br />   enctype - form<br />   face - font<br /> +  flashvars* - embed<br />   for - label<br />   frame - table<br />   frameborder - iframe<br /> @@ -2057,7 +2153,7 @@ A PHP Labware internal utility - <a href="http://www.bioinformatics.org/phpl </div> </div> <br /> -<hr /><br /><br /><span class="subtle"><small>HTM version of <em><a href="htmLawed_README.txt">htmLawed_README.txt</a></em> generated on 22 Dec, 2009 using <a href="http://www.bioinformatics.org/phplabware/internal_utilities">rTxt2htm</a> from PHP Labware</small></span> +<hr /><br /><br /><span class="subtle"><small>HTM version of <em><a href="htmLawed_README.txt">htmLawed_README.txt</a></em> generated on 06 Jun, 2012 using <a href="http://www.bioinformatics.org/phplabware/internal_utilities">rTxt2htm</a> from PHP Labware</small></span> </div><!-- ended div body --> </div><!-- ended div top --> </body> diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed_README.txt b/mod/htmlawed/vendors/htmLawed/htmLawed_README.txt index 48a67009b..e4027e465 100644..100755 --- a/mod/htmlawed/vendors/htmLawed/htmLawed_README.txt +++ b/mod/htmlawed/vendors/htmLawed/htmLawed_README.txt @@ -1,8 +1,8 @@ /* -htmLawed_README.txt, 22 December 2009 -htmLawed 1.1.9, 22 December 2009 +htmLawed_README.txt, 8 June 2012 +htmLawed 1.1.11, 5 June 2012 Copyright Santosh Patnaik -GPL v3 license +Dual licensed with LGPL 3 and GPL 2 or later A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed */ @@ -171,7 +171,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern -- 1.4 License & copyright ----------------------------------------o - htmLawed is free and open-source software licensed under GPL license version 3:- http://www.gnu.org/licenses/gpl-3.0.txt, and copyrighted by Santosh Patnaik, MD, PhD. + htmLawed is free and open-source software dual licensed under LGPL license version 3:- http://www.gnu.org/licenses/lgpl-3.0.txt and GPL license version 2:- http://www.gnu.org/licenses/gpl-2.0.txt or later, and copyrighted by Santosh Patnaik, MD, PhD. -- 1.5 Terms used here --------------------------------------------o @@ -200,9 +200,11 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern == 2 Usage ========================================================oo - htmLawed should work with PHP 4.3 and higher. Either 'include()' the 'htmLawed.php' file or copy-paste the entire code. + htmLawed should work with PHP 4.4 and higher. Either 'include()' the 'htmLawed.php' file or copy-paste the entire code. To easily *test* htmLawed using a form-based interface, use the provided demo:- htmLawedTest.php ('htmLawed.php' and 'htmLawedTest.php' should be in the same directory on the web-server). + + *Note*: For code for usage of the htmLawed class (for htmLawed in OOP), please refer to the htmLawed:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed website; the filtering itself can be configured, etc., as described here. -- 2.1 Simple ------------------------------------------------------ @@ -305,6 +307,12 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern '0' - none * 'string' - dictated by values in 'string' 'on*' (like 'onfocus') attributes not allowed - " + + *direct_nest_list* + Allow direct nesting of a list within another without requiring it to be a list item; see section:- #3.3.4 + + '0' - no * + '1' - yes *elements* Allowed HTML elements; see section:- #3.3 @@ -376,11 +384,11 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern '1' - will auto-adjust other relevant '$config' parameters (indicated by '"' in this list) *schemes* - Array of attribute-specific, comma-separated, lower-cased list of schemes (protocols) allowed in attributes accepting URLs; '*' covers all unspecified attributes; see section:- #3.4.3 + Array of attribute-specific, comma-separated, lower-cased list of schemes (protocols) allowed in attributes accepting URLs (or '!' to `deny` any URL); '*' covers all unspecified attributes; see section:- #3.4.3 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https' * '*: ftp, gopher, http, https, mailto, news, nntp, telnet' ^ - 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; style: nil; *:file, http, https' " + 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; style: !; *:file, http, https' " *show_setting* Name of a PHP variable to assign the `finalized` '$config' and '$spec' values; see section:- #3.8 @@ -469,7 +477,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern `Rule`: 'input=title(), value(maxval=8/default=6)' `Output`: '<input title="WIDTH" value="6" /><input title="length" value="5" />' - `Rule`: 'input=title(nomatch=$w.d$i), value(match=$em$/default=6em)' + `Rule`: 'input=title(nomatch=%w.d%i), value(match=%em%/default=6em)' `Output`: '<input value="10em" /><input title="length" value="6em" />' `Rule`: 'input=title(oneof=height|depth/default=depth), value(noneof=5|6)' @@ -491,17 +499,23 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern -- 2.5 Some security risks to keep in mind ------------------------o - When setting the parameters/arguments (like those to allow certain HTML elements) for use with htmLawed, one should bear in mind that the setting may let through potentially `dangerous` HTML code. (This may not be a problem if the authors are trusted.) + When setting the parameters/arguments (like those to allow certain HTML elements) for use with htmLawed, one should bear in mind that the setting may let through potentially `dangerous` HTML code which is meant to steal user-data, deface a website, render a page non-functional, etc. - For example, following increase security risks: + Unless end-users, either people or software, supplying the content are completely trusted, security issues arising from the degree of HTML usage permission has to be kept in mind. For example, following increase security risks: * Allowing 'script', 'applet', 'embed', 'iframe' or 'object' elements, or certain of their attributes like 'allowscriptaccess' * Allowing HTML comments (some Internet Explorer versions are vulnerable with, e.g., '<!--[if gte IE 4]><script>alert("xss");</script><![endif]-->' * Allowing dynamic CSS expressions (a feature of the IE browser) + + * Allowing the 'style' attribute - `Unsafe` HTML can be removed by setting '$config' appropriately. E.g., '$config["elements"] = "* -script"' (section:- #3.3), '$config["safe"] = 1' (section:- #3.6), etc. + To remove `unsecure` HTML, code-developers using htmLawed must set '$config' appropriately. E.g., '$config["elements"] = "* -script"' to deny the 'script' element (section:- #3.3), '$config["safe"] = 1' to auto-configure ceratin htmLawed parameters for maximizing security (section:- #3.6), etc. + + Permitting the '*style*' attribute brings in risks of `click-jacking`, `phishing`, web-page overlays, etc., `even` when the 'safe' parameter is enabled (see section:- #3.6). Except for URLs and a few other things like CSS dynamic expressions, htmLawed currently does not check every CSS style property. It does provide ways for the code-developer implementing htmLawed to do such checks through htmLawed's '$spec' argument, and through the 'hook_tag' parameter (see section:- #3.4.8 for more). Disallowing 'style' completely and relying on CSS classes and stylesheet files is recommended. + + htmLawed does not check or correct the character *encoding* of the input it receives. In conjunction with permitting circumstances such as when the character encoding is left undefined through HTTP headers or HTML 'meta' tags, this can permit an exploit (like Google's UTF-7/XSS vulnerability of the past). -- 2.6 Use without modifying old 'kses()' code --------------------o @@ -614,6 +628,8 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern * htmLawed does not correct certain possible attribute-based security vulnerabilities (e.g., '<a href="http://x%22+style=%22background-image:xss">x</a>'). These arise when browsers mis-identify markup in `escaped` text, defeating the very purpose of escaping text (a bad browser will read the given example as '<a href="http://x" style="background-image:xss">x</a>'). * Because of poor Unicode support in PHP, htmLawed does not remove the `high value` HTML-invalid characters with multi-byte code-points. Such characters however are extremely unlikely to be in the input. (see section:- #3.1). + + * htmLawed does not check or correct the character encoding of the input it receives. In conjunction with permitting circumstances such as when the character encoding is left undefined through HTTP headers or HTML 'meta' tags, this can permit an exploit (like Google's UTF-7/XSS vulnerability of the past). * Like any script using PHP's PCRE regex functions, PHP setup-specific low PCRE limit values can cause htmLawed to at least partially fail with very long input texts. @@ -925,6 +941,8 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern In some cases, the specs stipulate the number and/or the ordering of the child elements. A 'table' can have 0 or 1 'caption', 'tbody', 'tfoot', and 'thead', but they must be in this order: 'caption', 'thead', 'tfoot', 'tbody'. htmLawed currently does not check for conformance to these rules. Note that any non-compliance in this regard will not introduce security vulnerabilities, crash browser applications, or affect the rendering of web-pages. + + With '$config["direct_list_nest"]' set to '1', htmLawed will allow direct nesting of an 'ol' or 'ul' list within another 'ol' or 'ul' without requiring the child list to be within an 'li' of the parent list. While this is not standard-compliant, directly nested lists are rendered properly by almost all browsers. The parameter '$config["direct_list_nest"]' has no effect if tag-balancing (section:- #3.3.3) is turned off. -- 3.3.5 Beautify or compact HTML ---------------------------------o @@ -1020,6 +1038,8 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern Thus, `to allow Javascript`, one can set '$config["schemes"]' as 'href: mailto, http, https; *: http, https, javascript', or 'href: mailto, http, https, javascript; *: http, https, javascript', or '*: *', and so on. As a side-note, one may find 'style: *' useful as URLs in 'style' attributes can be specified in a variety of ways, and the patterns that htmLawed uses to identify URLs may mistakenly identify non-URL text. + + '!' can be put in the list of schemes to disallow all protocols as well as `local` URLs. Thus, with 'href: http, style: !', '<a href="http://cnn.com" style="background-image: url('local.jpg');">CNN</a>' will become '<a href="http://cnn.com" style="background-image: url('denied:local.jpg');">CNN</a>'. *Note*: If URL-accepting attributes other than those listed above are being allowed, then the scheme will not be checked unless the attribute name contains the string 'src' (e.g., 'dynsrc') or starts with 'o' (e.g., 'onbeforecopy'). @@ -1149,7 +1169,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern -- 3.4.8 Inline style properties ----------------------------------o - htmLawed can check URL schemes and dynamic expressions (to guard against Javascript, etc., script-based insecurities) in inline CSS style property values in the 'style' attributes. (CSS properties like 'background-image' that accept URLs in their values are noted in section:- #5.3.) Dynamic CSS expressions that allow scripting in the IE browser, and can be a vulnerability, can be removed from property values by setting '$config["css_expression"]' to '1' (default setting). + htmLawed can check URL schemes and dynamic expressions (to guard against Javascript, etc., script-based insecurities) in inline CSS style property values in the 'style' attributes. (CSS properties like 'background-image' that accept URLs in their values are noted in section:- #5.3.) Dynamic CSS expressions that allow scripting in the IE browser, and can be a vulnerability, can be removed from property values by setting '$config["css_expression"]' to '1' (default setting). Note that when '$config["css_expression"]' is set to '1', htmLawed will remove '/*' from the 'style' values. *Note*: Because of the various ways of representing characters in attribute values (URL-escapement, entitification, etc.), htmLawed might alter the values of the 'style' attribute values, and may even falsely identify dynamic CSS expressions and URL schemes in them. If this is an important issue, checking of URLs and dynamic expressions can be turned off ('$config["schemes"] = "...style:*..."', see section:- #3.4.3, and '$config["css_expression"] = 0'). Alternately, admins can use their own custom function for finer handling of 'style' values through the 'hook_tag' parameter (see section:- #3.4.9). @@ -1163,13 +1183,21 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern It is possible to utilize a custom hook function to alter the tag content htmLawed has finalized (i.e., after it has checked/corrected for required attributes, transformed attributes, lower-cased attribute names, etc.). - When '$config' parameter 'hook_tag' is set to the name of a function, htmLawed (function 'hl_tag()') will pass on the element name, and the `finalized` attribute name-value pairs as array elements to the function. The function is expected to return the full opening tag string like '<element_name attribute_1_name="attribute_1_value"...>' (for empty elements like 'img' and 'input', the element-closing slash '/' should also be included). + When '$config' parameter 'hook_tag' is set to the name of a function, htmLawed (function 'hl_tag()') will pass on the element name, and, in the case of an opening tag, the `finalized` attribute name-value pairs as array elements to the function. The function, after completing a task such as filtering or tag transformation, will typically return an empty string, the full opening tag string like '<element_name attribute_1_name="attribute_1_value"...>' (for empty elements like 'img' and 'input', the element-closing slash '/' should also be included), etc. + + Any 'hook_tag' function, since htmLawed version 1.1.11, also receives names of elements in closing tags, such as 'a' in the closing '</a>' tag of the element '<a href="http://cnn.com">CNN</a>'. Unlike for opening tags, no other value (i.e., the attribute name-value array) is passed to the function since a closing tag contains only element names. Typically, the function will return an empty string or a full closing tag (like '</a>'). This is a *powerful functionality* that can be exploited for various objectives: consolidate-and-convert inline 'style' attributes to 'class', convert 'embed' elements to 'object', permit only one 'caption' element in a 'table' element, disallow embedding of certain types of media, *inject HTML*, use CSSTidy:- http://csstidy.sourceforge.net to sanitize 'style' attribute values, etc. As an example, the custom hook code below can be used to force a series of specifically ordered 'id' attributes on all elements, and a specific 'param' element inside all 'object' elements: - function my_tag_function($element, $attribute_array){ + function my_tag_function($element, $attribute_array=0){ + + // If second argument is not received, it means a closing tag is being handled + if(is_numeric($attribute_array)){ + return "</$element>"; + } + static $id = 0; // Remove any duplicate element if($element == 'param' && isset($attribute_array['allowscriptaccess'])){ @@ -1192,6 +1220,9 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern foreach($attribute_array as $k=>$v){ $string .= " {$k}=\"{$v}\""; } + + static $empty_elements = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); + return "<{$element}{$string}". (isset($in_array($element, $empty_elements) ? ' /' : ''). '>'. $new_element; } @@ -1213,12 +1244,14 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern htmLawed allows an admin to use '$config["safe"]' to auto-adjust multiple '$config' parameters (such as 'elements' which declares the allowed element-set), which otherwise would have to be manually set. The relevant parameters are indicated by '"' in section:- #2.2). Thus, one can pass the '$config' argument with a simpler value. - With the value of '1', htmLawed considers 'CDATA' sections and HTML comments as plain text, and prohibits the 'applet', 'embed', 'iframe', 'object' and 'script' elements, and the 'on*' attributes like 'onclick'. ( There are '$config' parameters like 'css_expression' that are not affected by the value set for 'safe' but whose default values still contribute towards a more `safe` output.) Further, URLs with schemes (see section:- #3.4.3) are neutralized so that, e.g., 'style="moz-binding:url(http://danger)"' becomes 'style="moz-binding:url(denied:http://danger)"' while 'style="moz-binding:url(ok)"' remains intact. + With the value of '1', htmLawed considers 'CDATA' sections and HTML comments as plain text, and prohibits the 'applet', 'embed', 'iframe', 'object' and 'script' elements, and the 'on*' attributes like 'onclick'. ( There are '$config' parameters like 'css_expression' that are not affected by the value set for 'safe' but whose default values still contribute towards a more `safe` output.) Further, URLs with schemes (see section:- #3.4.3) are neutralized so that, e.g., 'style="moz-binding:url(http://danger)"' becomes 'style="moz-binding:url(denied:http://danger)"'. Admins, however, may still want to completely deny the 'style' attribute, e.g., with code like $processed = htmLawed($text, array('safe'=>1, 'deny_attribute'=>'style')); + Permitting the 'style' attribute brings in risks of `click-jacking`, etc. CSS property values can render a page non-functional or be used to deface it. Except for URLs, dynamic expressions, and some other things, htmLawed does not completely check 'style' values. It does provide ways for the code-developer implementing htmLawed to do such checks through the '$spec' argument, and through the 'hook_tag' parameter (see section:- #3.4.8 for more). Disallowing style completely and relying on CSS classes and stylesheet files is recommended. + If a value for a parameter auto-set through 'safe' is still manually provided, then that value can over-ride the auto-set value. E.g., with '$config["safe"] = 1' and '$config["elements"] = "*+script"', 'script', but not 'applet', is allowed. A page illustrating the efficacy of htmLawed's anti-XSS abilities with 'safe' set to '1' against XSS vectors listed by RSnake:- http://ha.ckers.org/xss.html may be available here:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/rsnake/RSnakeXSSTest.htm. @@ -1288,6 +1321,20 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern `Version number - Release date. Notes` + 1.1.11 - 5 June 2012. Fix for possible problem with handling of multi-byte characters in attribute values in an mbstring.func_overload enviroment. '$config["hook_tag"]', if specified, now receives names of elements in closing tags. + + 1.1.10 - 22 October 2011. Fix for a bug in the 'tidy' functionality that caused the entire input to be replaced with a single space; new parameter, '$config["direct_list_nest"]' to allow direct descendance of a list in a list. (5 April 2012. Dual licensing from LGPLv3 to LGPLv3 and GPLv2+.) + + 1.1.9.5 - 6 July 2011. Minor correction of a rule for nesting of 'li' within 'dir' + + 1.1.9.4 - 3 July 2010. Parameter 'schemes' now accepts '!' so any URL, even a local one, can be `denied`. An issue in which a second URL value in 'style' properties was not checked was fixed. + + 1.1.9.3 - 17 May 2010. Checks for correct nesting of 'param' + + 1.1.9.2 - 26 April 2010. Minor fix regarding rendering of denied URL schemes + + 1.1.9.1 - 26 February 2010. htmLawed now uses the LGPL version 3 license; support for 'flashvars' attribute for 'embed' + 1.1.9 - 22 December 2009. Soft-hyphens are now removed only from URL-accepting attribute values 1.1.8.1 - 16 July 2009. Minor code-change to fix a PHP error notice @@ -1336,6 +1383,10 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern Upgrading is as simple as replacing the previous version of 'htmLawed.php' (assuming it was not modified for customized features). As htmLawed output is almost always used in static documents, upgrading should not affect old, finalized content. + *Important* The following upgrades may affect the functionality of a specific htmLawed as indicated by their corresponding notes: + + (1) From version 1.1-1.1.10 to 1.1.11, if a 'hook_tag' function is in use: In version 1.1.11, elements in closing tags (and not just the opening tags) are also passed to the function. There are no attribute names/values to pass, so a 'hook_tag' function receives only the element name. The 'hook_tag' function therefore may have to be edited. See section:- #3.4.9. + Old versions of htmLawed may be available online. E.g., for version 1.0, check http://www.bioinformatics.org/phplabware/downloads/htmLawed1.zip, for 1.1.1, htmLawed111.zip, and for 1.1.10, htmLawed1110.zip. @@ -1382,7 +1433,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern -- 4.10 Acknowledgements ------------------------------------------o - Bryan Blakey, Ulf Harnhammer, Gareth Heyes, Lukasz Pilorz, Shelley Powers, Edward Yang, and many anonymous users. + Nicholas Alipaz, Bryan Blakey, Pádraic Brady, Ulf Harnhammer, Gareth Heyes, Klaus Leithoff, Lukasz Pilorz, Shelley Powers, Edward Yang, and many anonymous users. Thank you! @@ -1446,6 +1497,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern disabled - button, input, optgroup, option, select, textarea enctype - form face - font + flashvars* - embed for - label frame - table frameborder - iframe diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt b/mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt index ea24b1839..793a5a6a7 100644..100755 --- a/mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt +++ b/mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt @@ -1,8 +1,8 @@ /* -htmLawed_TESTCASE.txt, 22 December 2009 -htmLawed 1.1.9, 22 December 2009 +htmLawed_TESTCASE.txt, 22 October 2011 +htmLawed 1.1.11, 5 June 2012 Copyright Santosh Patnaik -GPL v3 license +Dual licensed with LGPL 3 and GPL 2 or later A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed */ @@ -94,6 +94,15 @@ The PHP <s>software</s> script used for this <strike>web-page</strike> webpage i <area href="5" shape="Rect" coords="0,0,118,28"> </map></object> +<param name="name">value</param> + +<object id="obj1"> + <param name="param1"> + <object id="obj2"> + <param name="param2"> + </object> +</object> + <h6>Complex-4: nested and other tables</h6> <table border="1" bgcolor="red"> <tr> <td> Cell </td> <td colspan="2" rowspan="2"> <table border="1" bgcolor="green"> <tr> <td> Cell </td> <td colspan="2" rowspan="2"> </td> </tr> <tr> <td> Cell </td> </tr> <tr> <td> Cell </td> <td> Cell </td> <td> Cell </td> </tr> </table> </td> </tr> <tr> <td> Cell </td> </tr> <tr> <td> Cell </td> <td> Cell </td> <td> Cell </td> </tr> </table><br /> @@ -181,10 +190,13 @@ text <img src="none" alt="none" /> <b>t<em> e <strong> x </strong> t</em></b> <h6>HTML comments (also CDATA)</h6> -Special characters inside: <!-- <![CDATA check ]]> -->, <!-- 3 < 4 > 3.5, & 4 > 4 -->, <!-- che--ck -->, <!--[if !IE]> <--><a>c</a><!--> <![endif]--><br /> -Normal: <!-- check -->, <!--check -->, <em>comment:<!-- check --></em><!-- check -->, <table><!-- check --><tr><td>text not allowed</td></tr></table><br /> -Malformed: <![cdata check ]]>, < ![CDATA check ]]>, < ![CDATA check ] ]><br /> -Invalid: <em <!-- check -->>comment in tag content</em>, <!--check--> +<strong>Script inside:</strong> <!--[if gte IE 4]> +<SCRIPT>alert('XSS');</SCRIPT> +<![endif]--><br /> +<strong>Special characters inside: <!-- <![CDATA check ]]> -->, <!-- 3 < 4 > 3.5, & 4 > 4 -->, <!-- che--ck -->, <!--[if !IE]> <--><a>c</a><!--> <![endif]--><br /> +<strong>Normal:</strong> <!-- check -->, <!--check -->, <em>comment:<!-- check --></em><!-- check -->, <table><!-- check --><tr><td>text not allowed</td></tr></table><br /> +<strong>Malformed:</strong> <![cdata check ]]>, < ![CDATA check ]]>, < ![CDATA check ] ]><br /> +Invalid:</strong> <em <!-- check -->>comment in tag content</em>, <!--check--> <h6>Ins-Del</h6> @@ -224,6 +236,11 @@ Invalid: <em <!-- check -->>comment in tag content</em>, <!--check--> <li>l3</li> <li>l4<ol><li>lo3</li><li>lo4<ol><li>lo5</li></ol></li></ol></li> </ul><br /> +<strong>Nested, directly</strong>: <ul> + <li>l1</li> + <ol>l2</ol> + <li>l3</li> +</ul><br /> <strong>Nested, close-tags omitted</strong>: <ul> <li>l1</li> <li>l2<ol><li>lo1<li>lo2</ol> @@ -242,6 +259,13 @@ Invalid: <em <!-- check -->>comment in tag content</em>, <!--check--> </li></ul> </td></tr></table></li></ol> +<h6>Microdata</h6> + +<div itemscope itemtype="http://data-vocabulary.org/Person"> +I am <span itemprop="name">X</span> but people call me <span itemprop="nickname">Y</span>. +Find me at <a href="http://www.xy.com" itemprop="url">www.xy.com</a> +</div> + <h6>Non-English text-1</h6> Inscrieţi-vă acum la a Zecea Conferinţă Internaţională<br /> @@ -320,7 +344,8 @@ na Alemanha. <strong>Relative and absolute:</strong> <a href="mailto:x"></a>, <a href="http://a.com/b/c/d.f"></a>, <a href="./../d.f"></a>, <a href="./d.f"></a>, <a href="d.f"></a>, <a href="#s"></a>, <a href="./../../d.f#s"></a><br /> (try base URL value of 'http://a.com/b/')<br /> <strong>CSS URLs:</strong> <div style="background-image: url('a.gif');"></div>, <div style="background-image: URL("a.gif");"></div>, <div style="background-image: url('http://a.com/a.gif');"></div>, <div style="background-image: url('./../a.gif');"></div>, <div style="background-image: url('js:xss')"></div><br /> -<strong>Anti-spam:</strong> (try regex for 'http://a.com', etc.) <a href="mailto:x@y.com"></a>, <a href="http://a.com/b@d.f"></a>, <a href="a.com/d.f" rel="nofollow"></a>, <a href="a.com/d.f" rel="1, 2"></a>, <a href="a.com/d.f"></a>, <a href="b.com/d.f"></a>, <a href="c.com/d.f"></a><br /> +<strong>Double URLs:</strong> <a style="behaviour: url(foo) url(http://example.com/xss.htc)">b</a><br /> +<strong>Anti-spam:</strong> (try regex for 'http://a.com', etc.) <a href="mailto:x@y.com"></a>, <a href="http://a.com/b@d.f"></a>, <a href="a.com/d.f" rel="nofollow"></a>, <a href="a.com/d.f" rel="1, 2"></a>, <a href="a.com/d.f"></a>, <a href="b.com/d.f"></a>, <a href="c.com/d.f">, <a href="denied:http://c.com/d.f"></a><br /> <h6>XSS</h6> |