From 2a616ab95e64154175ff3a0008728197406653e3 Mon Sep 17 00:00:00 2001 From: Sem Date: Sun, 1 Jul 2012 17:25:25 +0200 Subject: Fixes #4609. Ugraded htmlawed lib. --- mod/htmlawed/vendors/htmLawed/htmLawed.php | 54 +++--- mod/htmlawed/vendors/htmLawed/htmLawedTest.php | 29 +-- mod/htmlawed/vendors/htmLawed/htmLawed_README.htm | 198 +++++++++++++++------ mod/htmlawed/vendors/htmLawed/htmLawed_README.txt | 84 +++++++-- .../vendors/htmLawed/htmLawed_TESTCASE.txt | 41 ++++- 5 files changed, 290 insertions(+), 116 deletions(-) mode change 100644 => 100755 mod/htmlawed/vendors/htmLawed/htmLawed.php mode change 100644 => 100755 mod/htmlawed/vendors/htmLawed/htmLawedTest.php mode change 100644 => 100755 mod/htmlawed/vendors/htmLawed/htmLawed_README.txt mode change 100644 => 100755 mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt (limited to 'mod/htmlawed/vendors/htmLawed') diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed.php b/mod/htmlawed/vendors/htmLawed/htmLawed.php old mode 100644 new mode 100755 index 2556fdcf2..0d9624961 --- a/mod/htmlawed/vendors/htmLawed/htmLawed.php +++ b/mod/htmlawed/vendors/htmLawed/htmLawed.php @@ -1,9 +1,9 @@ 1, 'http'=>1, 'https'=>1,);} -if(!empty($C['safe']) && empty($C['schemes']['style'])){$C['schemes']['style'] = array('nil'=>1);} +if(!empty($C['safe']) && empty($C['schemes']['style'])){$C['schemes']['style'] = array('!'=>1);} $C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0; if(!isset($C['base_url']) or !preg_match('`^[a-zA-Z\d.+\-]+://[^/]+/(.+?/)?$`', $C['base_url'])){ $C['base_url'] = $C['abs_url'] = 0; @@ -65,6 +65,7 @@ $C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0); $C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char']; $C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0); $C['css_expression'] = empty($C['css_expression']) ? 0 : 1; +$C['direct_list_nest'] = empty($C['direct_list_nest']) ? 0 : 1; $C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1; $C['hook'] = (!empty($C['hook']) && function_exists($C['hook'])) ? $C['hook'] : 0; $C['hook_tag'] = (!empty($C['hook_tag']) && function_exists($C['hook_tag'])) ? $C['hook_tag'] : 0; @@ -149,14 +150,15 @@ $cI = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'b'=>1, 'bdo'=>1, 'bi $cN = array('a'=>array('a'=>1), 'button'=>array('a'=>1, 'button'=>1, 'fieldset'=>1, 'form'=>1, 'iframe'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'fieldset'=>array('fieldset'=>1), 'form'=>array('form'=>1), 'label'=>array('label'=>1), 'noscript'=>array('script'=>1), 'pre'=>array('big'=>1, 'font'=>1, 'img'=>1, 'object'=>1, 'script'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1), 'rb'=>array('ruby'=>1), 'rt'=>array('ruby'=>1)); // Illegal $cN2 = array_keys($cN); $cR = array('blockquote'=>1, 'dir'=>1, 'dl'=>1, 'form'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1); -$cS = array('colgroup'=>array('col'=>1), 'dir'=>array('li'), 'dl'=>array('dd'=>1, 'dt'=>1), 'menu'=>array('li'=>1), 'ol'=>array('li'=>1), 'optgroup'=>array('option'=>1), 'option'=>array('#pcdata'=>1), 'rbc'=>array('rb'=>1), 'rp'=>array('#pcdata'=>1), 'rtc'=>array('rt'=>1), 'ruby'=>array('rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1), 'select'=>array('optgroup'=>1, 'option'=>1), 'script'=>array('#pcdata'=>1), 'table'=>array('caption'=>1, 'col'=>1, 'colgroup'=>1, 'tfoot'=>1, 'tbody'=>1, 'tr'=>1, 'thead'=>1), 'tbody'=>array('tr'=>1), 'tfoot'=>array('tr'=>1), 'textarea'=>array('#pcdata'=>1), 'thead'=>array('tr'=>1), 'tr'=>array('td'=>1, 'th'=>1), 'ul'=>array('li'=>1)); // Specific - immediate parent-child +$cS = array('colgroup'=>array('col'=>1), 'dir'=>array('li'=>1), 'dl'=>array('dd'=>1, 'dt'=>1), 'menu'=>array('li'=>1), 'ol'=>array('li'=>1), 'optgroup'=>array('option'=>1), 'option'=>array('#pcdata'=>1), 'rbc'=>array('rb'=>1), 'rp'=>array('#pcdata'=>1), 'rtc'=>array('rt'=>1), 'ruby'=>array('rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1), 'select'=>array('optgroup'=>1, 'option'=>1), 'script'=>array('#pcdata'=>1), 'table'=>array('caption'=>1, 'col'=>1, 'colgroup'=>1, 'tfoot'=>1, 'tbody'=>1, 'tr'=>1, 'thead'=>1), 'tbody'=>array('tr'=>1), 'tfoot'=>array('tr'=>1), 'textarea'=>array('#pcdata'=>1), 'thead'=>array('tr'=>1), 'tr'=>array('td'=>1, 'th'=>1), 'ul'=>array('li'=>1)); // Specific - immediate parent-child +if($GLOBALS['C']['direct_list_nest']){$cS['ol'] = $cS['ul'] += array('ol'=>1, 'ul'=>1);} $cO = array('address'=>array('p'=>1), 'applet'=>array('param'=>1), 'blockquote'=>array('script'=>1), 'fieldset'=>array('legend'=>1, '#pcdata'=>1), 'form'=>array('script'=>1), 'map'=>array('area'=>1), 'object'=>array('param'=>1, 'embed'=>1)); // Other $cT = array('colgroup'=>1, 'dd'=>1, 'dt'=>1, 'li'=>1, 'option'=>1, 'p'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1); // Omitable closing // block/inline type; ins & del both type; #pcdata: text $eB = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'del'=>1, 'dir'=>1, 'dl'=>1, 'div'=>1, 'fieldset'=>1, 'form'=>1, 'ins'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'isindex'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'table'=>1, 'ul'=>1); -$eI = array('#pcdata'=>1, 'a'=>1, 'abbr'=>1, 'acronym'=>1, 'applet'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'br'=>1, 'button'=>1, 'cite'=>1, 'code'=>1, 'del'=>1, 'dfn'=>1, 'em'=>1, 'embed'=>1, 'font'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'kbd'=>1, 'label'=>1, 'map'=>1, 'object'=>1, 'param'=>1, 'q'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'select'=>1, 'script'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); +$eI = array('#pcdata'=>1, 'a'=>1, 'abbr'=>1, 'acronym'=>1, 'applet'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'br'=>1, 'button'=>1, 'cite'=>1, 'code'=>1, 'del'=>1, 'dfn'=>1, 'em'=>1, 'embed'=>1, 'font'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'kbd'=>1, 'label'=>1, 'map'=>1, 'object'=>1, 'q'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'select'=>1, 'script'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); $eN = array('a'=>1, 'big'=>1, 'button'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'label'=>1, 'object'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1); // Exclude from specific ele; $cN values -$eO = array('area'=>1, 'caption'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'dt'=>1, 'legend'=>1, 'li'=>1, 'optgroup'=>1, 'option'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'script'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'thead'=>1, 'th'=>1, 'tr'=>1); // Missing in $eB & $eI +$eO = array('area'=>1, 'caption'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'dt'=>1, 'legend'=>1, 'li'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'script'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'thead'=>1, 'th'=>1, 'tr'=>1); // Missing in $eB & $eI $eF = $eB + $eI; // $in sets allowed child @@ -295,20 +297,14 @@ function hl_cmtcd($t){ // comment/CDATA sec handler $t = $t[0]; global $C; -if($t[3] == '-'){ - if(!$C['comment']){return $t;} - if($C['comment'] == 1){return '';} +if(!($v = $C[$n = $t[3] == '-' ? 'comment' : 'cdata'])){return $t;} +if($v == 1){return '';} +if($n == 'comment'){ if(substr(($t = preg_replace('`--+`', '-', substr($t, 4, -3))), -1) != ' '){$t .= ' ';} - $t = $C['comment'] == 2 ? str_replace(array('&', '<', '>'), array('&', '<', '>'), $t) : $t; - $t = "\x01\x02\x04!--$t--\x05\x02\x01"; -}else{ // CDATA - if(!$C['cdata']){return $t;} - if($C['cdata'] == 1){return '';} - $t = substr($t, 1, -1); - $t = $C['cdata'] == 2 ? str_replace(array('&', '<', '>'), array('&', '<', '>'), $t) : $t; - $t = "\x01\x01\x04$t\x05\x01\x01"; -} -return str_replace(array('&', '<', '>'), array("\x03", "\x04", "\x05"), $t); +} +else{$t = substr($t, 1, -1);} +$t = $v == 2 ? str_replace(array('&', '<', '>'), array('&', '<', '>'), $t) : $t; +return str_replace(array('&', '<', '>'), array("\x03", "\x04", "\x05"), ($n == 'comment' ? "\x01\x02\x04!--$t--\x05\x02\x01" : "\x01\x01\x04$t\x05\x01\x01")); // eof } @@ -334,9 +330,11 @@ global $C; $b = $a = ''; if($c == null){$c = 'style'; $b = $p[1]; $a = $p[3]; $p = trim($p[2]);} $c = isset($C['schemes'][$c]) ? $C['schemes'][$c] : $C['schemes']['*']; -if(isset($c['*']) or !strcspn($p, '#?;')){return "{$b}{$p}{$a}";} // All ok, frag, query, param +static $d = 'denied:'; +if(isset($c['!']) && substr($p, 0, 7) != $d){$p = "$d$p";} +if(isset($c['*']) or !strcspn($p, '#?;') or (substr($p, 0, 7) == $d)){return "{$b}{$p}{$a}";} // All ok, frag, query, param if(preg_match('`^([a-z\d\-+.&#; ]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])){ // Denied prot - return "{$b}denied:{$p}{$a}"; + return "{$b}{$d}{$p}{$a}"; } if($C['abs_url']){ if($C['abs_url'] == -1 && strpos($p, $C['base_url']) === 0){ // Make url rel @@ -429,11 +427,11 @@ if($C['make_tag_strict'] && isset($eD[$e])){ // close tag static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty ele if(!empty($m[1])){ - return (!isset($eE[$e]) ? "" : (($C['keep_bad'])%2 ? str_replace(array('<', '>'), array('<', '>'), $t) : '')); + return (!isset($eE[$e]) ? (empty($C['hook_tag']) ? "" : $C['hook_tag']($e)) : (($C['keep_bad'])%2 ? str_replace(array('<', '>'), array('<', '>'), $t) : '')); } // open tag & attr -static $aN = array('abbr'=>array('td'=>1, 'th'=>1), 'accept-charset'=>array('form'=>1), 'accept'=>array('form'=>1, 'input'=>1), 'accesskey'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'legend'=>1, 'textarea'=>1), 'action'=>array('form'=>1), 'align'=>array('caption'=>1, 'embed'=>1, 'applet'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'legend'=>1, 'table'=>1, 'hr'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'p'=>1, 'col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'alt'=>array('applet'=>1, 'area'=>1, 'img'=>1, 'input'=>1), 'archive'=>array('applet'=>1, 'object'=>1), 'axis'=>array('td'=>1, 'th'=>1), 'bgcolor'=>array('embed'=>1, 'table'=>1, 'tr'=>1, 'td'=>1, 'th'=>1), 'border'=>array('table'=>1, 'img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'cellpadding'=>array('table'=>1), 'cellspacing'=>array('table'=>1), 'char'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charoff'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charset'=>array('a'=>1, 'script'=>1), 'checked'=>array('input'=>1), 'cite'=>array('blockquote'=>1, 'q'=>1, 'del'=>1, 'ins'=>1), 'classid'=>array('object'=>1), 'clear'=>array('br'=>1), 'code'=>array('applet'=>1), 'codebase'=>array('object'=>1, 'applet'=>1), 'codetype'=>array('object'=>1), 'color'=>array('font'=>1), 'cols'=>array('textarea'=>1), 'colspan'=>array('td'=>1, 'th'=>1), 'compact'=>array('dir'=>1, 'dl'=>1, 'menu'=>1, 'ol'=>1, 'ul'=>1), 'coords'=>array('area'=>1, 'a'=>1), 'data'=>array('object'=>1), 'datetime'=>array('del'=>1, 'ins'=>1), 'declare'=>array('object'=>1), 'defer'=>array('script'=>1), 'dir'=>array('bdo'=>1), 'disabled'=>array('button'=>1, 'input'=>1, 'optgroup'=>1, 'option'=>1, 'select'=>1, 'textarea'=>1), 'enctype'=>array('form'=>1), 'face'=>array('font'=>1), 'for'=>array('label'=>1), 'frame'=>array('table'=>1), 'frameborder'=>array('iframe'=>1), 'headers'=>array('td'=>1, 'th'=>1), 'height'=>array('embed'=>1, 'iframe'=>1, 'td'=>1, 'th'=>1, 'img'=>1, 'object'=>1, 'applet'=>1), 'href'=>array('a'=>1, 'area'=>1), 'hreflang'=>array('a'=>1), 'hspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'ismap'=>array('img'=>1, 'input'=>1), 'label'=>array('option'=>1, 'optgroup'=>1), 'language'=>array('script'=>1), 'longdesc'=>array('img'=>1, 'iframe'=>1), 'marginheight'=>array('iframe'=>1), 'marginwidth'=>array('iframe'=>1), 'maxlength'=>array('input'=>1), 'method'=>array('form'=>1), 'model'=>array('embed'=>1), 'multiple'=>array('select'=>1), 'name'=>array('button'=>1, 'embed'=>1, 'textarea'=>1, 'applet'=>1, 'select'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'a'=>1, 'input'=>1, 'object'=>1, 'map'=>1, 'param'=>1), 'nohref'=>array('area'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'object'=>array('applet'=>1), 'onblur'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onchange'=>array('input'=>1, 'select'=>1, 'textarea'=>1), 'onfocus'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onreset'=>array('form'=>1), 'onselect'=>array('input'=>1, 'textarea'=>1), 'onsubmit'=>array('form'=>1), 'pluginspage'=>array('embed'=>1), 'pluginurl'=>array('embed'=>1), 'prompt'=>array('isindex'=>1), 'readonly'=>array('textarea'=>1, 'input'=>1), 'rel'=>array('a'=>1), 'rev'=>array('a'=>1), 'rows'=>array('textarea'=>1), 'rowspan'=>array('td'=>1, 'th'=>1), 'rules'=>array('table'=>1), 'scope'=>array('td'=>1, 'th'=>1), 'scrolling'=>array('iframe'=>1), 'selected'=>array('option'=>1), 'shape'=>array('area'=>1, 'a'=>1), 'size'=>array('hr'=>1, 'font'=>1, 'input'=>1, 'select'=>1), 'span'=>array('col'=>1, 'colgroup'=>1), 'src'=>array('embed'=>1, 'script'=>1, 'input'=>1, 'iframe'=>1, 'img'=>1), 'standby'=>array('object'=>1), 'start'=>array('ol'=>1), 'summary'=>array('table'=>1), 'tabindex'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'object'=>1, 'select'=>1, 'textarea'=>1), 'target'=>array('a'=>1, 'area'=>1, 'form'=>1), 'type'=>array('a'=>1, 'embed'=>1, 'object'=>1, 'param'=>1, 'script'=>1, 'input'=>1, 'li'=>1, 'ol'=>1, 'ul'=>1, 'button'=>1), 'usemap'=>array('img'=>1, 'input'=>1, 'object'=>1), 'valign'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'value'=>array('input'=>1, 'option'=>1, 'param'=>1, 'button'=>1, 'li'=>1), 'valuetype'=>array('param'=>1), 'vspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'width'=>array('embed'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'object'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'applet'=>1, 'col'=>1, 'colgroup'=>1, 'pre'=>1), 'wmode'=>array('embed'=>1), 'xml:space'=>array('pre'=>1, 'script'=>1, 'style'=>1)); // Ele-specific +static $aN = array('abbr'=>array('td'=>1, 'th'=>1), 'accept-charset'=>array('form'=>1), 'accept'=>array('form'=>1, 'input'=>1), 'accesskey'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'legend'=>1, 'textarea'=>1), 'action'=>array('form'=>1), 'align'=>array('caption'=>1, 'embed'=>1, 'applet'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'legend'=>1, 'table'=>1, 'hr'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'p'=>1, 'col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'alt'=>array('applet'=>1, 'area'=>1, 'img'=>1, 'input'=>1), 'archive'=>array('applet'=>1, 'object'=>1), 'axis'=>array('td'=>1, 'th'=>1), 'bgcolor'=>array('embed'=>1, 'table'=>1, 'tr'=>1, 'td'=>1, 'th'=>1), 'border'=>array('table'=>1, 'img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'cellpadding'=>array('table'=>1), 'cellspacing'=>array('table'=>1), 'char'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charoff'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charset'=>array('a'=>1, 'script'=>1), 'checked'=>array('input'=>1), 'cite'=>array('blockquote'=>1, 'q'=>1, 'del'=>1, 'ins'=>1), 'classid'=>array('object'=>1), 'clear'=>array('br'=>1), 'code'=>array('applet'=>1), 'codebase'=>array('object'=>1, 'applet'=>1), 'codetype'=>array('object'=>1), 'color'=>array('font'=>1), 'cols'=>array('textarea'=>1), 'colspan'=>array('td'=>1, 'th'=>1), 'compact'=>array('dir'=>1, 'dl'=>1, 'menu'=>1, 'ol'=>1, 'ul'=>1), 'coords'=>array('area'=>1, 'a'=>1), 'data'=>array('object'=>1), 'datetime'=>array('del'=>1, 'ins'=>1), 'declare'=>array('object'=>1), 'defer'=>array('script'=>1), 'dir'=>array('bdo'=>1), 'disabled'=>array('button'=>1, 'input'=>1, 'optgroup'=>1, 'option'=>1, 'select'=>1, 'textarea'=>1), 'enctype'=>array('form'=>1), 'face'=>array('font'=>1), 'flashvars'=>array('embed'=>1), 'for'=>array('label'=>1), 'frame'=>array('table'=>1), 'frameborder'=>array('iframe'=>1), 'headers'=>array('td'=>1, 'th'=>1), 'height'=>array('embed'=>1, 'iframe'=>1, 'td'=>1, 'th'=>1, 'img'=>1, 'object'=>1, 'applet'=>1), 'href'=>array('a'=>1, 'area'=>1), 'hreflang'=>array('a'=>1), 'hspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'ismap'=>array('img'=>1, 'input'=>1), 'label'=>array('option'=>1, 'optgroup'=>1), 'language'=>array('script'=>1), 'longdesc'=>array('img'=>1, 'iframe'=>1), 'marginheight'=>array('iframe'=>1), 'marginwidth'=>array('iframe'=>1), 'maxlength'=>array('input'=>1), 'method'=>array('form'=>1), 'model'=>array('embed'=>1), 'multiple'=>array('select'=>1), 'name'=>array('button'=>1, 'embed'=>1, 'textarea'=>1, 'applet'=>1, 'select'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'a'=>1, 'input'=>1, 'object'=>1, 'map'=>1, 'param'=>1), 'nohref'=>array('area'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'object'=>array('applet'=>1), 'onblur'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onchange'=>array('input'=>1, 'select'=>1, 'textarea'=>1), 'onfocus'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onreset'=>array('form'=>1), 'onselect'=>array('input'=>1, 'textarea'=>1), 'onsubmit'=>array('form'=>1), 'pluginspage'=>array('embed'=>1), 'pluginurl'=>array('embed'=>1), 'prompt'=>array('isindex'=>1), 'readonly'=>array('textarea'=>1, 'input'=>1), 'rel'=>array('a'=>1), 'rev'=>array('a'=>1), 'rows'=>array('textarea'=>1), 'rowspan'=>array('td'=>1, 'th'=>1), 'rules'=>array('table'=>1), 'scope'=>array('td'=>1, 'th'=>1), 'scrolling'=>array('iframe'=>1), 'selected'=>array('option'=>1), 'shape'=>array('area'=>1, 'a'=>1), 'size'=>array('hr'=>1, 'font'=>1, 'input'=>1, 'select'=>1), 'span'=>array('col'=>1, 'colgroup'=>1), 'src'=>array('embed'=>1, 'script'=>1, 'input'=>1, 'iframe'=>1, 'img'=>1), 'standby'=>array('object'=>1), 'start'=>array('ol'=>1), 'summary'=>array('table'=>1), 'tabindex'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'object'=>1, 'select'=>1, 'textarea'=>1), 'target'=>array('a'=>1, 'area'=>1, 'form'=>1), 'type'=>array('a'=>1, 'embed'=>1, 'object'=>1, 'param'=>1, 'script'=>1, 'input'=>1, 'li'=>1, 'ol'=>1, 'ul'=>1, 'button'=>1), 'usemap'=>array('img'=>1, 'input'=>1, 'object'=>1), 'valign'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'value'=>array('input'=>1, 'option'=>1, 'param'=>1, 'button'=>1, 'li'=>1), 'valuetype'=>array('param'=>1), 'vspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'width'=>array('embed'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'object'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'applet'=>1, 'col'=>1, 'colgroup'=>1, 'pre'=>1), 'wmode'=>array('embed'=>1), 'xml:space'=>array('pre'=>1, 'script'=>1, 'style'=>1)); // Ele-specific static $aNE = array('checked'=>1, 'compact'=>1, 'declare'=>1, 'defer'=>1, 'disabled'=>1, 'ismap'=>1, 'multiple'=>1, 'nohref'=>1, 'noresize'=>1, 'noshade'=>1, 'nowrap'=>1, 'readonly'=>1, 'selected'=>1); // Empty static $aNP = array('action'=>1, 'cite'=>1, 'classid'=>1, 'codebase'=>1, 'data'=>1, 'href'=>1, 'longdesc'=>1, 'model'=>1, 'pluginspage'=>1, 'pluginurl'=>1, 'usemap'=>1); // Need scheme check; excludes style, on* & src static $aNU = array('class'=>array('param'=>1, 'script'=>1), 'dir'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'id'=>array('script'=>1), 'lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'xml:lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'onclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'ondblclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeydown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeypress'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeyup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousedown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousemove'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseout'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseover'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'style'=>array('param'=>1, 'script'=>1), 'title'=>array('param'=>1, 'script'=>1)); // Univ & exceptions @@ -472,8 +470,8 @@ while(strlen($a)){ $aA[$nm] = ''; } break; case 2: // Val - if(preg_match('`^"[^"]*"`', $a, $m) or preg_match("`^'[^']*'`", $a, $m) or preg_match("`^\s*[^\s\"']+`", $a, $m)){ - $m = $m[0]; $w = 1; $mode = 0; $a = ltrim(substr_replace($a, '', 0, strlen($m))); + if(preg_match('`^((?:"[^"]*")|(?:\'[^\']*\')|(?:\s*[^\s"\']+))(.*)`', $a, $m)){ + $a = ltrim($m[2]); $m = $m[1]; $w = 1; $mode = 0; $aA[$nm] = trim(($m[0] == '"' or $m[0] == '\'') ? substr($m, 1, -1) : $m); } break; @@ -500,7 +498,7 @@ foreach($aA as $k=>$v){ static $sC = array(' '=>' ', ' '=>' ', 'E'=>'e', 'E'=>'e', 'e'=>'e', 'e'=>'e', 'X'=>'x', 'X'=>'x', 'x'=>'x', 'x'=>'x', 'P'=>'p', 'P'=>'p', 'p'=>'p', 'p'=>'p', 'S'=>'s', 'S'=>'s', 's'=>'s', 's'=>'s', 'I'=>'i', 'I'=>'i', 'i'=>'i', 'i'=>'i', 'O'=>'o', 'O'=>'o', 'o'=>'o', 'o'=>'o', 'N'=>'n', 'N'=>'n', 'n'=>'n', 'n'=>'n', 'U'=>'u', 'U'=>'u', 'u'=>'u', 'u'=>'u', 'R'=>'r', 'R'=>'r', 'r'=>'r', 'r'=>'r', 'L'=>'l', 'L'=>'l', 'l'=>'l', 'l'=>'l', '('=>'(', '('=>'(', ')'=>')', ')'=>')', ' '=>':', ' '=>':', '"'=>'"', '"'=>'"', '''=>"'", '''=>"'", '/'=>'/', '/'=>'/', '*'=>'*', '*'=>'*', '\'=>'\\', '\'=>'\\'); $v = strtr($v, $sC); } - $v = preg_replace_callback('`(url(?:\()(?: )*(?:\'|"|&(?:quot|apos);)?)(.+)((?:\'|"|&(?:quot|apos);)?(?: )*(?:\)))`iS', 'hl_prot', $v); + $v = preg_replace_callback('`(url(?:\()(?: )*(?:\'|"|&(?:quot|apos);)?)(.+?)((?:\'|"|&(?:quot|apos);)?(?: )*(?:\)))`iS', 'hl_prot', $v); $v = !$C['css_expression'] ? preg_replace('`expression`i', ' ', preg_replace('`\\\\\S|(/|(%2f))(\*|(%2a))`i', ' ', $v)) : $v; }elseif(isset($aNP[$k]) or strpos($k, 'src') !== false or $k[0] == 'o'){ $v = str_replace("\xad", ' ', (strpos($v, '&') !== false ? str_replace(array('­', '­', '­'), ' ', $v) : $v)); @@ -643,7 +641,7 @@ return ''; function hl_tidy($t, $w, $p){ // Tidy/compact HTM if(strpos(' pre,script,textarea', "$p,")){return $t;} -$t = str_replace(' ]*(?)\s+`', '`\s+`', '`(<\w[^>]*(?) `'), array(' $1', ' ', '$1'), preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea).*?>)(.+?)()`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t))); +$t = str_replace(' ]*(?)\s+`', '`\s+`', '`(<\w[^>]*(?) `'), array(' $1', ' ', '$1'), preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)()`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t))); if(($w = strtolower($w)) == -1){ return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t); } @@ -688,7 +686,7 @@ return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array( function hl_version(){ // rel -return '1.1.9'; +return '1.1.11'; // eof } diff --git a/mod/htmlawed/vendors/htmLawed/htmLawedTest.php b/mod/htmlawed/vendors/htmLawed/htmLawedTest.php old mode 100644 new mode 100755 index 160bd012d..806aa4641 --- a/mod/htmlawed/vendors/htmLawed/htmLawedTest.php +++ b/mod/htmlawed/vendors/htmLawed/htmLawedTest.php @@ -1,10 +1,10 @@ -Encoding: +Encoding:
@@ -454,6 +456,7 @@ $cfg = array( 'comment'=>array('4', 'nil', 'allow HTML comments', 'nil'), 'css_expression'=>array('2', 'nil', 'allow dynamic expressions in CSS style properties', 'nil'), 'deny_attribute'=>array('1', '0', 'denied attributes', '0', '50', '', 'these'), +'direct_list_nest'=>array('2', 'nil', 'allow direct nesting of a list within another without requiring it to be a list item', 'nil'), 'elements'=>array('', '', 'allowed elements', '50'), 'hexdec_entity'=>array('3', '1', 'convert hexadecimal numeric entities to decimal ones, or vice versa', '0'), 'hook'=>array('', '', 'name of hook function', '25'), @@ -516,23 +519,23 @@ if($do){ } } - if($cfg['anti_link_spam'] && (!empty($cfg['anti_link_spam11']) or !empty($cfg['anti_link_spam12']))){ + if(isset($cfg['anti_link_spam']) && $cfg['anti_link_spam'] && (!empty($cfg['anti_link_spam11']) or !empty($cfg['anti_link_spam12']))){ $cfg['anti_link_spam'] = array($cfg['anti_link_spam11'], $cfg['anti_link_spam12']); } unset($cfg['anti_link_spam11'], $cfg['anti_link_spam12']); - if($cfg['anti_mail_spam'] == 1){ + if(isset($cfg['anti_mail_spam']) && $cfg['anti_mail_spam'] == 1){ $cfg['anti_mail_spam'] = isset($cfg['anti_mail_spam1'][0]) ? $cfg['anti_mail_spam1'] : 0; } unset($cfg['anti_mail_spam11']); - if($cfg['deny_attribute'] == 1){ + if(isset($cfg['deny_attribute']) && $cfg['deny_attribute'] == 1){ $cfg['deny_attribute'] = isset($cfg['deny_attribute1'][0]) ? $cfg['deny_attribute1'] : 0; } unset($cfg['deny_attribute1']); - if($cfg['tidy'] == 2){ + if(isset($cfg['tidy']) && $cfg['tidy'] == 2){ $cfg['tidy'] = isset($cfg['tidy2'][0]) ? $cfg['tidy2'] : 0; } unset($cfg['tidy2']); - if($cfg['unique_ids'] == 2){ + if(isset($cfg['unique_ids']) && $cfg['unique_ids'] == 2){ $cfg['unique_ids'] = isset($cfg['unique_ids2'][0]) ? $cfg['unique_ids2'] : 1; } unset($cfg['unique_ids2']); @@ -540,9 +543,9 @@ if($do){ $cfg['show_setting'] = 'hlcfg'; $st = microtime(); - $out = htmLawed($_POST['text'], $cfg, str_replace(array('$', '{'), '', $_POST['spec'])); + $out = htmLawed($_POST['text'], $cfg, $_POST['spec']); $et = microtime(); - echo '
Input code » ', strlen($_POST['text']), ' chars, ~', round((substr_count($_POST['text'], '>') + substr_count($_POST['text'], '<'))/2), ' tags ', (!isset($_POST['text'][$_hlimit]) ? ' Input binary » ' : ''), ' Finalized internal settings »  ', '
Output » htmLawed processing time ', number_format(((substr($et,0,9)) + (substr($et,-10)) - (substr($st,0,9)) - (substr($st,-10))),4), ' s', (($mem = memory_get_peak_usage()) !== false ? ', peak memory usage '. round(($mem-$pre_mem)/1048576, 2). ' MB' : ''), '
'; + echo '
Input code » ', strlen($_POST['text']), ' chars, ~', ($tag = round((substr_count($_POST['text'], '>') + substr_count($_POST['text'], '<'))/2)), ' tag', ($tag > 1 ? 's' : ''), ' ', (!isset($_POST['text'][$_hlimit]) ? ' Input binary » ' : ''), ' Finalized internal settings »  ', '
Output » htmLawed processing time ', number_format(((substr($et,0,9)) + (substr($et,-10)) - (substr($st,0,9)) - (substr($st,-10))),4), ' s', (($mem = memory_get_peak_usage()) !== false ? ', peak memory usage '. round(($mem-$pre_mem)/1048576, 2). ' MB' : ''), '
'; if($_w3c_validate && $validation) { ?> diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed_README.htm b/mod/htmlawed/vendors/htmLawed/htmLawed_README.htm index 7138ee9c0..6dd78fb2e 100644 --- a/mod/htmlawed/vendors/htmLawed/htmLawed_README.htm +++ b/mod/htmlawed/vendors/htmLawed/htmLawed_README.htm @@ -7,40 +7,74 @@ htmLawed documentation | htmLawed PHP software is a free, open-source, customizable HTML input purifier and filter @@ -110,10 +144,10 @@ span.totop a, span.totop a:visited {color: #6699cc;}

-
htmLawed_README.txt, 22 December 2009
-htmLawed 1.1.9, 22 December 2009
+
htmLawed_README.txt, 8 June 2012
+htmLawed 1.1.11, 5 June 2012
Copyright Santosh Patnaik
-GPL v3 license
+Dual licensed with LGPL 3 and GPL 2 or later
A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed 

@@ -222,7 +256,7 @@ A PHP Labware internal utility - 1.4  License & copyright (to top)

-  htmLawed is free and open-source software licensed under GPL license version 3, and copyrighted by Santosh Patnaik, MD, PhD.
+  htmLawed is free and open-source software dual licensed under LGPL license version 3 and GPL license version 2 or later, and copyrighted by Santosh Patnaik, MD, PhD.

@@ -254,9 +288,11 @@ A PHP Labware internal utility - 2  Usage

(to top)

-  htmLawed should work with PHP 4.3 and higher. Either include() the htmLawed.php file or copy-paste the entire code.
+  htmLawed should work with PHP 4.4 and higher. Either include() the htmLawed.php file or copy-paste the entire code.

  To easily test htmLawed using a form-based interface, use the provided demo (htmLawed.php and htmLawedTest.php should be in the same directory on the web-server).
+
Note: For code for usage of the htmLawed class (for htmLawed in OOP), please refer to the htmLawed website; the filtering itself can be configured, etc., as described here.

2.1  Simple @@ -371,6 +407,12 @@ A PHP Labware internal utility - string - dictated by values in string
  on* (like onfocus) attributes not allowed - "

direct_nest_list
+  Allow direct nesting of a list within another without requiring it to be a list item; see
section 3.3.4
+
0 - no  *
1 - yes
+
  elements
  Allowed HTML elements; see section 3.3

@@ -441,11 +483,11 @@ A PHP Labware internal utility - 1 - will auto-adjust other relevant $config parameters (indicated by " in this list)

  schemes
-  Array of attribute-specific, comma-separated, lower-cased list of schemes (protocols) allowed in attributes accepting URLs; * covers all unspecified attributes; see
section 3.4.3
+  Array of attribute-specific, comma-separated, lower-cased list of schemes (protocols) allowed in attributes accepting URLs (or ! to deny any URL); * covers all unspecified attributes; see section 3.4.3

  href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https  *
  *: ftp, gopher, http, https, mailto, news, nntp, telnet  ^
href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; style: nil; *:file, http, https  "
href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; style: !; *:file, http, https  "

  show_setting
  Name of a PHP variable to assign the finalized $config and $spec values; see section 3.8
@@ -541,7 +583,7 @@ A PHP Labware internal utility - input=title(), value(maxval=8/default=6)
  Output: <input title="WIDTH" value="6" /><input title="length" value="5" />

Rule: input=title(nomatch=$w.d$i), value(match=$em$/default=6em)
Rule: input=title(nomatch=%w.d%i), value(match=%em%/default=6em)
  Output: <input value="10em" /><input title="length" value="6em" />

  Rule: input=title(oneof=height|depth/default=depth), value(noneof=5|6)
@@ -565,9 +607,9 @@ A PHP Labware internal utility -
2.5  Some security risks to keep in mind

(to top)

-  When setting the parameters/arguments (like those to allow certain HTML elements) for use with htmLawed, one should bear in mind that the setting may let through potentially dangerous HTML code. (This may not be a problem if the authors are trusted.)
+  When setting the parameters/arguments (like those to allow certain HTML elements) for use with htmLawed, one should bear in mind that the setting may let through potentially dangerous HTML code which is meant to steal user-data, deface a website, render a page non-functional, etc.

-  For example, following increase security risks:
+  Unless end-users, either people or software, supplying the content are completely trusted, security issues arising from the degree of HTML usage permission has to be kept in mind. For example, following increase security risks:

  *  Allowing script, applet, embed, iframe or object elements, or certain of their attributes like allowscriptaccess

@@ -575,7 +617,13 @@ A PHP Labware internal utility - $config appropriately. E.g., $config["elements"] = "* -script" (section 3.3), $config["safe"] = 1 (section 3.6), etc.
+  *  Allowing the style attribute
+
+  To remove unsecure HTML, code-developers using htmLawed must set $config appropriately. E.g., $config["elements"] = "* -script" to deny the script element (section 3.3), $config["safe"] = 1 to auto-configure ceratin htmLawed parameters for maximizing security (section 3.6), etc.
+
+  Permitting the *style* attribute brings in risks of click-jacking, phishing, web-page overlays, etc., even when the safe parameter is enabled (see section 3.6). Except for URLs and a few other things like CSS dynamic expressions, htmLawed currently does not check every CSS style property. It does provide ways for the code-developer implementing htmLawed to do such checks through htmLawed's $spec argument, and through the hook_tag parameter (see section 3.4.8 for more). Disallowing style completely and relying on CSS classes and stylesheet files is recommended.
+
+  htmLawed does not check or correct the character encoding of the input it receives. In conjunction with permitting circumstances such as when the character encoding is left undefined through HTTP headers or HTML meta tags, this can permit an exploit (like Google's UTF-7/XSS vulnerability of the past).

@@ -722,6 +770,8 @@ A PHP Labware internal utility - section 3.1).

+  *  htmLawed does not check or correct the character encoding of the input it receives. In conjunction with permitting circumstances such as when the character encoding is left undefined through HTTP headers or HTML meta tags, this can permit an exploit (like Google's UTF-7/XSS vulnerability of the past).
+
  *  Like any script using PHP's PCRE regex functions, PHP setup-specific low PCRE limit values can cause htmLawed to at least partially fail with very long input texts.

@@ -1162,6 +1212,8 @@ A PHP Labware internal utility - table can have 0 or 1 caption, tbody, tfoot, and thead, but they must be in this order: caption, thead, tfoot, tbody.

  htmLawed currently does not check for conformance to these rules. Note that any non-compliance in this regard will not introduce security vulnerabilities, crash browser applications, or affect the rendering of web-pages.
+
+  With $config["direct_list_nest"] set to 1, htmLawed will allow direct nesting of an ol or ul list within another ol or ul without requiring the child list to be within an li of the parent list. While this is not standard-compliant, directly nested lists are rendered properly by almost all browsers. The parameter $config["direct_list_nest"] has no effect if tag-balancing (
section 3.3.3) is turned off.

@@ -1271,6 +1323,8 @@ A PHP Labware internal utility - style: * useful as URLs in style attributes can be specified in a variety of ways, and the patterns that htmLawed uses to identify URLs may mistakenly identify non-URL text.

! can be put in the list of schemes to disallow all protocols as well as local URLs. Thus, with href: http, style: !, '<a href="http://cnn.com" style="background-image: url('local.jpg');">CNN</a>' will become '<a href="http://cnn.com" style="background-image: url('denied:local.jpg');">CNN</a>'.
+
  Note: If URL-accepting attributes other than those listed above are being allowed, then the scheme will not be checked unless the attribute name contains the string src (e.g., dynsrc) or starts with o (e.g., onbeforecopy).

  With $config["safe"] = 1, all URLs are disallowed in the style attribute values.
@@ -1488,7 +1542,7 @@ A PHP Labware internal utility -
3.4.8  Inline style properties

(to top)

-  htmLawed can check URL schemes and dynamic expressions (to guard against Javascript, etc., script-based insecurities) in inline CSS style property values in the style attributes. (CSS properties like background-image that accept URLs in their values are noted in section 5.3.) Dynamic CSS expressions that allow scripting in the IE browser, and can be a vulnerability, can be removed from property values by setting $config["css_expression"] to 1 (default setting).
+  htmLawed can check URL schemes and dynamic expressions (to guard against Javascript, etc., script-based insecurities) in inline CSS style property values in the style attributes. (CSS properties like background-image that accept URLs in their values are noted in section 5.3.) Dynamic CSS expressions that allow scripting in the IE browser, and can be a vulnerability, can be removed from property values by setting $config["css_expression"] to 1 (default setting). Note that when $config["css_expression"] is set to 1, htmLawed will remove /* from the style values.

  Note: Because of the various ways of representing characters in attribute values (URL-escapement, entitification, etc.), htmLawed might alter the values of the style attribute values, and may even falsely identify dynamic CSS expressions and URL schemes in them. If this is an important issue, checking of URLs and dynamic expressions can be turned off ($config["schemes"] = "...style:*...", see section 3.4.3, and $config["css_expression"] = 0). Alternately, admins can use their own custom function for finer handling of style values through the hook_tag parameter (see section 3.4.9).

@@ -1503,14 +1557,30 @@ A PHP Labware internal utility - $config parameter hook_tag is set to the name of a function, htmLawed (function hl_tag()) will pass on the element name, and the finalized attribute name-value pairs as array elements to the function. The function is expected to return the full opening tag string like <element_name attribute_1_name="attribute_1_value"...> (for empty elements like img and input, the element-closing slash / should also be included).
+  When $config parameter hook_tag is set to the name of a function, htmLawed (function hl_tag()) will pass on the element name, and, in the case of an opening tag, the finalized attribute name-value pairs as array elements to the function. The function, after completing a task such as filtering or tag transformation, will typically return an empty string, the full opening tag string like <element_name attribute_1_name="attribute_1_value"...> (for empty elements like img and input, the element-closing slash / should also be included), etc.
+
+  Any hook_tag function, since htmLawed version 1.1.11, also receives names of elements in closing tags, such as a in the closing </a> tag of the element <a href="http://cnn.com">CNN</a>. Unlike for opening tags, no other value (i.e., the attribute name-value array) is passed to the function since a closing tag contains only element names. Typically, the function will return an empty string or a full closing tag (like </a>).

  This is a powerful functionality that can be exploited for various objectives: consolidate-and-convert inline style attributes to class, convert embed elements to object, permit only one caption element in a table element, disallow embedding of certain types of media, inject HTML, use
CSSTidy to sanitize style attribute values, etc.

  As an example, the custom hook code below can be used to force a series of specifically ordered id attributes on all elements, and a specific param element inside all object elements:

-    function my_tag_function($element, $attribute_array){ +    function my_tag_function($element, $attribute_array=0){ +
+
+ +      // If second argument is not received, it means a closing tag is being handled +
+ +      if(is_numeric($attribute_array)){ +
+ +        return "</$element>"; +
+ +      } +

      static $id = 0; @@ -1570,6 +1640,11 @@ A PHP Labware internal utility -       }
+
+ +      static $empty_elements = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); +
+
      return "<{$element}{$string}". (isset($in_array($element, $empty_elements) ? ' /' : ''). '>'. $new_element;
@@ -1598,7 +1673,7 @@ A PHP Labware internal utility -
$config["safe"] to auto-adjust multiple $config parameters (such as elements which declares the allowed element-set), which otherwise would have to be manually set. The relevant parameters are indicated by " in section 2.2). Thus, one can pass the $config argument with a simpler value.

-  With the value of 1, htmLawed considers CDATA sections and HTML comments as plain text, and prohibits the applet, embed, iframe, object and script elements, and the on* attributes like onclick. ( There are $config parameters like css_expression that are not affected by the value set for safe but whose default values still contribute towards a more safe output.) Further, URLs with schemes (see section 3.4.3) are neutralized so that, e.g., style="moz-binding:url(http://danger)" becomes style="moz-binding:url(denied:http://danger)" while style="moz-binding:url(ok)" remains intact.
+  With the value of 1, htmLawed considers CDATA sections and HTML comments as plain text, and prohibits the applet, embed, iframe, object and script elements, and the on* attributes like onclick. ( There are $config parameters like css_expression that are not affected by the value set for safe but whose default values still contribute towards a more safe output.) Further, URLs with schemes (see section 3.4.3) are neutralized so that, e.g., style="moz-binding:url(http://danger)" becomes style="moz-binding:url(denied:http://danger)".

  Admins, however, may still want to completely deny the style attribute, e.g., with code like

@@ -1606,6 +1681,8 @@ A PHP Labware internal utility -     $processed = htmLawed($text, array('safe'=>1, 'deny_attribute'=>'style'));

+  Permitting the style attribute brings in risks of click-jacking, etc. CSS property values can render a page non-functional or be used to deface it. Except for URLs, dynamic expressions, and some other things, htmLawed does not completely check style values. It does provide ways for the code-developer implementing htmLawed to do such checks through the $spec argument, and through the hook_tag parameter (see
section 3.4.8 for more). Disallowing style completely and relying on CSS classes and stylesheet files is recommended.
+
  If a value for a parameter auto-set through safe is still manually provided, then that value can over-ride the auto-set value. E.g., with $config["safe"] = 1 and $config["elements"] = "*+script", script, but not applet, is allowed.

  A page illustrating the efficacy of htmLawed's anti-XSS abilities with safe set to 1 against XSS vectors listed by RSnake may be available here.
@@ -1688,6 +1765,20 @@ A PHP Labware internal utility - $config["hook_tag"], if specified, now receives names of elements in closing tags.
+
+  1.1.10 - 22 October 2011. Fix for a bug in the tidy functionality that caused the entire input to be replaced with a single space; new parameter, $config["direct_list_nest"] to allow direct descendance of a list in a list. (5 April 2012. Dual licensing from LGPLv3 to LGPLv3 and GPLv2+.)
+
+  1.1.9.5 - 6 July 2011. Minor correction of a rule for nesting of li within dir
+
+  1.1.9.4 - 3 July 2010. Parameter schemes now accepts ! so any URL, even a local one, can be denied. An issue in which a second URL value in style properties was not checked was fixed.
+
+  1.1.9.3 - 17 May 2010. Checks for correct nesting of param
+
+  1.1.9.2 - 26 April 2010. Minor fix regarding rendering of denied URL schemes
+
+  1.1.9.1 - 26 February 2010. htmLawed now uses the LGPL version 3 license; support for flashvars attribute for embed
+
  1.1.9 - 22 December 2009. Soft-hyphens are now removed only from URL-accepting attribute values

  1.1.8.1 - 16 July 2009. Minor code-change to fix a PHP error notice
@@ -1738,6 +1829,10 @@ A PHP Labware internal utility -
htmLawed.php (assuming it was not modified for customized features). As htmLawed output is almost always used in static documents, upgrading should not affect old, finalized content.

Important  The following upgrades may affect the functionality of a specific htmLawed as indicated by their corresponding notes:
+
+  (1) From version 1.1-1.1.10 to 1.1.11, if a hook_tag function is in use: In version 1.1.11, elements in closing tags (and not just the opening tags) are also passed to the function. There are no attribute names/values to pass, so a hook_tag function receives only the element name. The hook_tag function therefore may have to be edited. See
section 3.4.9.
+
  Old versions of htmLawed may be available online. E.g., for version 1.0, check http://www.bioinformatics.org/phplabware/downloads/htmLawed1.zip, for 1.1.1, htmLawed111.zip, and for 1.1.10, htmLawed1110.zip.
@@ -1789,7 +1884,7 @@ A PHP Labware internal utility - 4.10  Acknowledgements (to top)

-  Bryan Blakey, Ulf Harnhammer, Gareth Heyes, Lukasz Pilorz, Shelley Powers, Edward Yang, and many anonymous users.
+  Nicholas Alipaz, Bryan Blakey, Pádraic Brady, Ulf Harnhammer, Gareth Heyes, Klaus Leithoff, Lukasz Pilorz, Shelley Powers, Edward Yang, and many anonymous users.

  Thank you!
@@ -1856,6 +1951,7 @@ A PHP Labware internal utility -

-


HTM version of
htmLawed_README.txt generated on 22 Dec, 2009 using rTxt2htm from PHP Labware +


HTM version of htmLawed_README.txt generated on 06 Jun, 2012 using rTxt2htm from PHP Labware
diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed_README.txt b/mod/htmlawed/vendors/htmLawed/htmLawed_README.txt old mode 100644 new mode 100755 index 48a67009b..e4027e465 --- a/mod/htmlawed/vendors/htmLawed/htmLawed_README.txt +++ b/mod/htmlawed/vendors/htmLawed/htmLawed_README.txt @@ -1,8 +1,8 @@ /* -htmLawed_README.txt, 22 December 2009 -htmLawed 1.1.9, 22 December 2009 +htmLawed_README.txt, 8 June 2012 +htmLawed 1.1.11, 5 June 2012 Copyright Santosh Patnaik -GPL v3 license +Dual licensed with LGPL 3 and GPL 2 or later A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed */ @@ -171,7 +171,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern -- 1.4 License & copyright ----------------------------------------o - htmLawed is free and open-source software licensed under GPL license version 3:- http://www.gnu.org/licenses/gpl-3.0.txt, and copyrighted by Santosh Patnaik, MD, PhD. + htmLawed is free and open-source software dual licensed under LGPL license version 3:- http://www.gnu.org/licenses/lgpl-3.0.txt and GPL license version 2:- http://www.gnu.org/licenses/gpl-2.0.txt or later, and copyrighted by Santosh Patnaik, MD, PhD. -- 1.5 Terms used here --------------------------------------------o @@ -200,9 +200,11 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern == 2 Usage ========================================================oo - htmLawed should work with PHP 4.3 and higher. Either 'include()' the 'htmLawed.php' file or copy-paste the entire code. + htmLawed should work with PHP 4.4 and higher. Either 'include()' the 'htmLawed.php' file or copy-paste the entire code. To easily *test* htmLawed using a form-based interface, use the provided demo:- htmLawedTest.php ('htmLawed.php' and 'htmLawedTest.php' should be in the same directory on the web-server). + + *Note*: For code for usage of the htmLawed class (for htmLawed in OOP), please refer to the htmLawed:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed website; the filtering itself can be configured, etc., as described here. -- 2.1 Simple ------------------------------------------------------ @@ -305,6 +307,12 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern '0' - none * 'string' - dictated by values in 'string' 'on*' (like 'onfocus') attributes not allowed - " + + *direct_nest_list* + Allow direct nesting of a list within another without requiring it to be a list item; see section:- #3.3.4 + + '0' - no * + '1' - yes *elements* Allowed HTML elements; see section:- #3.3 @@ -376,11 +384,11 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern '1' - will auto-adjust other relevant '$config' parameters (indicated by '"' in this list) *schemes* - Array of attribute-specific, comma-separated, lower-cased list of schemes (protocols) allowed in attributes accepting URLs; '*' covers all unspecified attributes; see section:- #3.4.3 + Array of attribute-specific, comma-separated, lower-cased list of schemes (protocols) allowed in attributes accepting URLs (or '!' to `deny` any URL); '*' covers all unspecified attributes; see section:- #3.4.3 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https' * '*: ftp, gopher, http, https, mailto, news, nntp, telnet' ^ - 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; style: nil; *:file, http, https' " + 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; style: !; *:file, http, https' " *show_setting* Name of a PHP variable to assign the `finalized` '$config' and '$spec' values; see section:- #3.8 @@ -469,7 +477,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern `Rule`: 'input=title(), value(maxval=8/default=6)' `Output`: '' - `Rule`: 'input=title(nomatch=$w.d$i), value(match=$em$/default=6em)' + `Rule`: 'input=title(nomatch=%w.d%i), value(match=%em%/default=6em)' `Output`: '' `Rule`: 'input=title(oneof=height|depth/default=depth), value(noneof=5|6)' @@ -491,17 +499,23 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern -- 2.5 Some security risks to keep in mind ------------------------o - When setting the parameters/arguments (like those to allow certain HTML elements) for use with htmLawed, one should bear in mind that the setting may let through potentially `dangerous` HTML code. (This may not be a problem if the authors are trusted.) + When setting the parameters/arguments (like those to allow certain HTML elements) for use with htmLawed, one should bear in mind that the setting may let through potentially `dangerous` HTML code which is meant to steal user-data, deface a website, render a page non-functional, etc. - For example, following increase security risks: + Unless end-users, either people or software, supplying the content are completely trusted, security issues arising from the degree of HTML usage permission has to be kept in mind. For example, following increase security risks: * Allowing 'script', 'applet', 'embed', 'iframe' or 'object' elements, or certain of their attributes like 'allowscriptaccess' * Allowing HTML comments (some Internet Explorer versions are vulnerable with, e.g., '' * Allowing dynamic CSS expressions (a feature of the IE browser) + + * Allowing the 'style' attribute - `Unsafe` HTML can be removed by setting '$config' appropriately. E.g., '$config["elements"] = "* -script"' (section:- #3.3), '$config["safe"] = 1' (section:- #3.6), etc. + To remove `unsecure` HTML, code-developers using htmLawed must set '$config' appropriately. E.g., '$config["elements"] = "* -script"' to deny the 'script' element (section:- #3.3), '$config["safe"] = 1' to auto-configure ceratin htmLawed parameters for maximizing security (section:- #3.6), etc. + + Permitting the '*style*' attribute brings in risks of `click-jacking`, `phishing`, web-page overlays, etc., `even` when the 'safe' parameter is enabled (see section:- #3.6). Except for URLs and a few other things like CSS dynamic expressions, htmLawed currently does not check every CSS style property. It does provide ways for the code-developer implementing htmLawed to do such checks through htmLawed's '$spec' argument, and through the 'hook_tag' parameter (see section:- #3.4.8 for more). Disallowing 'style' completely and relying on CSS classes and stylesheet files is recommended. + + htmLawed does not check or correct the character *encoding* of the input it receives. In conjunction with permitting circumstances such as when the character encoding is left undefined through HTTP headers or HTML 'meta' tags, this can permit an exploit (like Google's UTF-7/XSS vulnerability of the past). -- 2.6 Use without modifying old 'kses()' code --------------------o @@ -614,6 +628,8 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern * htmLawed does not correct certain possible attribute-based security vulnerabilities (e.g., 'x'). These arise when browsers mis-identify markup in `escaped` text, defeating the very purpose of escaping text (a bad browser will read the given example as 'x'). * Because of poor Unicode support in PHP, htmLawed does not remove the `high value` HTML-invalid characters with multi-byte code-points. Such characters however are extremely unlikely to be in the input. (see section:- #3.1). + + * htmLawed does not check or correct the character encoding of the input it receives. In conjunction with permitting circumstances such as when the character encoding is left undefined through HTTP headers or HTML 'meta' tags, this can permit an exploit (like Google's UTF-7/XSS vulnerability of the past). * Like any script using PHP's PCRE regex functions, PHP setup-specific low PCRE limit values can cause htmLawed to at least partially fail with very long input texts. @@ -925,6 +941,8 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern In some cases, the specs stipulate the number and/or the ordering of the child elements. A 'table' can have 0 or 1 'caption', 'tbody', 'tfoot', and 'thead', but they must be in this order: 'caption', 'thead', 'tfoot', 'tbody'. htmLawed currently does not check for conformance to these rules. Note that any non-compliance in this regard will not introduce security vulnerabilities, crash browser applications, or affect the rendering of web-pages. + + With '$config["direct_list_nest"]' set to '1', htmLawed will allow direct nesting of an 'ol' or 'ul' list within another 'ol' or 'ul' without requiring the child list to be within an 'li' of the parent list. While this is not standard-compliant, directly nested lists are rendered properly by almost all browsers. The parameter '$config["direct_list_nest"]' has no effect if tag-balancing (section:- #3.3.3) is turned off. -- 3.3.5 Beautify or compact HTML ---------------------------------o @@ -1020,6 +1038,8 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern Thus, `to allow Javascript`, one can set '$config["schemes"]' as 'href: mailto, http, https; *: http, https, javascript', or 'href: mailto, http, https, javascript; *: http, https, javascript', or '*: *', and so on. As a side-note, one may find 'style: *' useful as URLs in 'style' attributes can be specified in a variety of ways, and the patterns that htmLawed uses to identify URLs may mistakenly identify non-URL text. + + '!' can be put in the list of schemes to disallow all protocols as well as `local` URLs. Thus, with 'href: http, style: !', 'CNN' will become 'CNN'. *Note*: If URL-accepting attributes other than those listed above are being allowed, then the scheme will not be checked unless the attribute name contains the string 'src' (e.g., 'dynsrc') or starts with 'o' (e.g., 'onbeforecopy'). @@ -1149,7 +1169,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern -- 3.4.8 Inline style properties ----------------------------------o - htmLawed can check URL schemes and dynamic expressions (to guard against Javascript, etc., script-based insecurities) in inline CSS style property values in the 'style' attributes. (CSS properties like 'background-image' that accept URLs in their values are noted in section:- #5.3.) Dynamic CSS expressions that allow scripting in the IE browser, and can be a vulnerability, can be removed from property values by setting '$config["css_expression"]' to '1' (default setting). + htmLawed can check URL schemes and dynamic expressions (to guard against Javascript, etc., script-based insecurities) in inline CSS style property values in the 'style' attributes. (CSS properties like 'background-image' that accept URLs in their values are noted in section:- #5.3.) Dynamic CSS expressions that allow scripting in the IE browser, and can be a vulnerability, can be removed from property values by setting '$config["css_expression"]' to '1' (default setting). Note that when '$config["css_expression"]' is set to '1', htmLawed will remove '/*' from the 'style' values. *Note*: Because of the various ways of representing characters in attribute values (URL-escapement, entitification, etc.), htmLawed might alter the values of the 'style' attribute values, and may even falsely identify dynamic CSS expressions and URL schemes in them. If this is an important issue, checking of URLs and dynamic expressions can be turned off ('$config["schemes"] = "...style:*..."', see section:- #3.4.3, and '$config["css_expression"] = 0'). Alternately, admins can use their own custom function for finer handling of 'style' values through the 'hook_tag' parameter (see section:- #3.4.9). @@ -1163,13 +1183,21 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern It is possible to utilize a custom hook function to alter the tag content htmLawed has finalized (i.e., after it has checked/corrected for required attributes, transformed attributes, lower-cased attribute names, etc.). - When '$config' parameter 'hook_tag' is set to the name of a function, htmLawed (function 'hl_tag()') will pass on the element name, and the `finalized` attribute name-value pairs as array elements to the function. The function is expected to return the full opening tag string like '' (for empty elements like 'img' and 'input', the element-closing slash '/' should also be included). + When '$config' parameter 'hook_tag' is set to the name of a function, htmLawed (function 'hl_tag()') will pass on the element name, and, in the case of an opening tag, the `finalized` attribute name-value pairs as array elements to the function. The function, after completing a task such as filtering or tag transformation, will typically return an empty string, the full opening tag string like '' (for empty elements like 'img' and 'input', the element-closing slash '/' should also be included), etc. + + Any 'hook_tag' function, since htmLawed version 1.1.11, also receives names of elements in closing tags, such as 'a' in the closing '' tag of the element 'CNN'. Unlike for opening tags, no other value (i.e., the attribute name-value array) is passed to the function since a closing tag contains only element names. Typically, the function will return an empty string or a full closing tag (like ''). This is a *powerful functionality* that can be exploited for various objectives: consolidate-and-convert inline 'style' attributes to 'class', convert 'embed' elements to 'object', permit only one 'caption' element in a 'table' element, disallow embedding of certain types of media, *inject HTML*, use CSSTidy:- http://csstidy.sourceforge.net to sanitize 'style' attribute values, etc. As an example, the custom hook code below can be used to force a series of specifically ordered 'id' attributes on all elements, and a specific 'param' element inside all 'object' elements: - function my_tag_function($element, $attribute_array){ + function my_tag_function($element, $attribute_array=0){ + + // If second argument is not received, it means a closing tag is being handled + if(is_numeric($attribute_array)){ + return ""; + } + static $id = 0; // Remove any duplicate element if($element == 'param' && isset($attribute_array['allowscriptaccess'])){ @@ -1192,6 +1220,9 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern foreach($attribute_array as $k=>$v){ $string .= " {$k}=\"{$v}\""; } + + static $empty_elements = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); + return "<{$element}{$string}". (isset($in_array($element, $empty_elements) ? ' /' : ''). '>'. $new_element; } @@ -1213,12 +1244,14 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern htmLawed allows an admin to use '$config["safe"]' to auto-adjust multiple '$config' parameters (such as 'elements' which declares the allowed element-set), which otherwise would have to be manually set. The relevant parameters are indicated by '"' in section:- #2.2). Thus, one can pass the '$config' argument with a simpler value. - With the value of '1', htmLawed considers 'CDATA' sections and HTML comments as plain text, and prohibits the 'applet', 'embed', 'iframe', 'object' and 'script' elements, and the 'on*' attributes like 'onclick'. ( There are '$config' parameters like 'css_expression' that are not affected by the value set for 'safe' but whose default values still contribute towards a more `safe` output.) Further, URLs with schemes (see section:- #3.4.3) are neutralized so that, e.g., 'style="moz-binding:url(http://danger)"' becomes 'style="moz-binding:url(denied:http://danger)"' while 'style="moz-binding:url(ok)"' remains intact. + With the value of '1', htmLawed considers 'CDATA' sections and HTML comments as plain text, and prohibits the 'applet', 'embed', 'iframe', 'object' and 'script' elements, and the 'on*' attributes like 'onclick'. ( There are '$config' parameters like 'css_expression' that are not affected by the value set for 'safe' but whose default values still contribute towards a more `safe` output.) Further, URLs with schemes (see section:- #3.4.3) are neutralized so that, e.g., 'style="moz-binding:url(http://danger)"' becomes 'style="moz-binding:url(denied:http://danger)"'. Admins, however, may still want to completely deny the 'style' attribute, e.g., with code like $processed = htmLawed($text, array('safe'=>1, 'deny_attribute'=>'style')); + Permitting the 'style' attribute brings in risks of `click-jacking`, etc. CSS property values can render a page non-functional or be used to deface it. Except for URLs, dynamic expressions, and some other things, htmLawed does not completely check 'style' values. It does provide ways for the code-developer implementing htmLawed to do such checks through the '$spec' argument, and through the 'hook_tag' parameter (see section:- #3.4.8 for more). Disallowing style completely and relying on CSS classes and stylesheet files is recommended. + If a value for a parameter auto-set through 'safe' is still manually provided, then that value can over-ride the auto-set value. E.g., with '$config["safe"] = 1' and '$config["elements"] = "*+script"', 'script', but not 'applet', is allowed. A page illustrating the efficacy of htmLawed's anti-XSS abilities with 'safe' set to '1' against XSS vectors listed by RSnake:- http://ha.ckers.org/xss.html may be available here:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/rsnake/RSnakeXSSTest.htm. @@ -1288,6 +1321,20 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern `Version number - Release date. Notes` + 1.1.11 - 5 June 2012. Fix for possible problem with handling of multi-byte characters in attribute values in an mbstring.func_overload enviroment. '$config["hook_tag"]', if specified, now receives names of elements in closing tags. + + 1.1.10 - 22 October 2011. Fix for a bug in the 'tidy' functionality that caused the entire input to be replaced with a single space; new parameter, '$config["direct_list_nest"]' to allow direct descendance of a list in a list. (5 April 2012. Dual licensing from LGPLv3 to LGPLv3 and GPLv2+.) + + 1.1.9.5 - 6 July 2011. Minor correction of a rule for nesting of 'li' within 'dir' + + 1.1.9.4 - 3 July 2010. Parameter 'schemes' now accepts '!' so any URL, even a local one, can be `denied`. An issue in which a second URL value in 'style' properties was not checked was fixed. + + 1.1.9.3 - 17 May 2010. Checks for correct nesting of 'param' + + 1.1.9.2 - 26 April 2010. Minor fix regarding rendering of denied URL schemes + + 1.1.9.1 - 26 February 2010. htmLawed now uses the LGPL version 3 license; support for 'flashvars' attribute for 'embed' + 1.1.9 - 22 December 2009. Soft-hyphens are now removed only from URL-accepting attribute values 1.1.8.1 - 16 July 2009. Minor code-change to fix a PHP error notice @@ -1336,6 +1383,10 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern Upgrading is as simple as replacing the previous version of 'htmLawed.php' (assuming it was not modified for customized features). As htmLawed output is almost always used in static documents, upgrading should not affect old, finalized content. + *Important* The following upgrades may affect the functionality of a specific htmLawed as indicated by their corresponding notes: + + (1) From version 1.1-1.1.10 to 1.1.11, if a 'hook_tag' function is in use: In version 1.1.11, elements in closing tags (and not just the opening tags) are also passed to the function. There are no attribute names/values to pass, so a 'hook_tag' function receives only the element name. The 'hook_tag' function therefore may have to be edited. See section:- #3.4.9. + Old versions of htmLawed may be available online. E.g., for version 1.0, check http://www.bioinformatics.org/phplabware/downloads/htmLawed1.zip, for 1.1.1, htmLawed111.zip, and for 1.1.10, htmLawed1110.zip. @@ -1382,7 +1433,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern -- 4.10 Acknowledgements ------------------------------------------o - Bryan Blakey, Ulf Harnhammer, Gareth Heyes, Lukasz Pilorz, Shelley Powers, Edward Yang, and many anonymous users. + Nicholas Alipaz, Bryan Blakey, Pádraic Brady, Ulf Harnhammer, Gareth Heyes, Klaus Leithoff, Lukasz Pilorz, Shelley Powers, Edward Yang, and many anonymous users. Thank you! @@ -1446,6 +1497,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern disabled - button, input, optgroup, option, select, textarea enctype - form face - font + flashvars* - embed for - label frame - table frameborder - iframe diff --git a/mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt b/mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt old mode 100644 new mode 100755 index ea24b1839..793a5a6a7 --- a/mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt +++ b/mod/htmlawed/vendors/htmLawed/htmLawed_TESTCASE.txt @@ -1,8 +1,8 @@ /* -htmLawed_TESTCASE.txt, 22 December 2009 -htmLawed 1.1.9, 22 December 2009 +htmLawed_TESTCASE.txt, 22 October 2011 +htmLawed 1.1.11, 5 June 2012 Copyright Santosh Patnaik -GPL v3 license +Dual licensed with LGPL 3 and GPL 2 or later A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed */ @@ -94,6 +94,15 @@ The PHP software script used for this web-page webpage i +value + + + + + + + +
Complex-4: nested and other tables
Cell
Cell
Cell
Cell Cell Cell
Cell
Cell Cell Cell

@@ -181,10 +190,13 @@ text none t e x t
HTML comments (also CDATA)
-Special characters inside: , , , c
-Normal: , , comment:,
text not allowed

-Malformed: , < ![CDATA check ]]>, < ![CDATA check ] ]>
-Invalid: >comment in tag content, +Script inside:
+Special characters inside: , , , c
+Normal: , , comment:,
text not allowed

+Malformed: , < ![CDATA check ]]>, < ![CDATA check ] ]>
+Invalid:
>comment in tag content,
Ins-Del
@@ -224,6 +236,11 @@ Invalid: >comment in tag content,
  • l3
  • l4
    1. lo3
    2. lo4
      1. lo5

  • +Nested, directly:
      +
    • l1
    • +
        l2
      +
    • l3
    • +

    Nested, close-tags omitted:
    • l1
    • l2
      1. lo1
      2. lo2
      @@ -242,6 +259,13 @@ Invalid: >comment in tag content,
    +
    Microdata
    + +
    +I am X but people call me Y. +Find me at +
    +
    Non-English text-1
    Inscrieţi-vă acum la a Zecea Conferinţă Internaţională
    @@ -320,7 +344,8 @@ na Alemanha. Relative and absolute: , , , , , ,
    (try base URL value of 'http://a.com/b/')
    CSS URLs:
    ,
    ,
    ,
    ,

    -Anti-spam: (try regex for 'http://a.com', etc.) , , , , , ,
    +Double URLs: b
    +Anti-spam: (try regex for 'http://a.com', etc.) , , , , , , ,
    XSS
    -- cgit v1.2.3