From e8b29ab788547534deb8a0a946b5f77700a9f7a9 Mon Sep 17 00:00:00 2001 From: brettp Date: Wed, 1 Jul 2009 14:55:58 +0000 Subject: Refs #1086: Added htmLawed plugin as replacement for kses. git-svn-id: https://code.elgg.org/elgg/trunk@3375 36083f99-b078-4883-b0ff-0f9b5a30f544 --- mod/htmlawed/vendors/htmLawed/htmLawedTest.php | 591 +++++++++++++++++++++++++ 1 file changed, 591 insertions(+) create mode 100644 mod/htmlawed/vendors/htmLawed/htmLawedTest.php (limited to 'mod/htmlawed/vendors/htmLawed/htmLawedTest.php') diff --git a/mod/htmlawed/vendors/htmLawed/htmLawedTest.php b/mod/htmlawed/vendors/htmLawed/htmLawedTest.php new file mode 100644 index 000000000..c2caaff50 --- /dev/null +++ b/mod/htmlawed/vendors/htmLawed/htmLawedTest.php @@ -0,0 +1,591 @@ + $v){ + $_POST[$k] = stripslashes($v); + } + ini_set('magic_quotes_gpc', 0); +} +set_magic_quotes_runtime(0); + +$_POST['enc'] = (isset($_POST['enc']) and preg_match('`^[-\w]+$`', $_POST['enc'])) ? $_POST['enc'] : 'utf-8'; + +// token for anti-CSRF +if(count($_POST)){ + if((empty($_GET['pre']) and ((!empty($_POST['token']) and !empty($_SESSION['token']) and $_POST['token'] != $_SESSION['token']) or empty($_POST[$_sid]) or $_POST[$_sid] != session_id() or empty($_COOKIE[$_sid]) or $_COOKIE[$_sid] != session_id())) or ($_POST[$_sid] != session_id())){ + $_POST = array('enc'=>'utf-8'); + } +} +if(empty($_GET['pre'])){ + $_SESSION['token'] = md5(uniqid(rand(), 1)); + $token = $_SESSION['token']; + session_regenerate_id(1); +} + +// compress +if(function_exists('gzencode') && isset($_SERVER['HTTP_ACCEPT_ENCODING']) && preg_match('`gzip|deflate`i', $_SERVER['HTTP_ACCEPT_ENCODING']) && !ini_get('zlib.output_compression')){ + ob_start('ob_gzhandler'); +} + +// HTM for unprocessed +if(isset($_POST['inputH'])){ + echo 'htmLawed test: HTML view of unprocessed input

  Rendering of unprocessed input without an HTML doctype or charset declaration     close window | htmLawed test page

', $_POST['inputH'], '
'; + exit; +} + +// main +$_POST['text'] = isset($_POST['text']) ? $_POST['text'] : 'text to process; < '. $_limit. ' characters'. ($_hlimit ? ' (for binary hexdump view, < '. $_hlimit. ')' : ''); +$do = (!empty($_POST[$_sid]) && isset($_POST['text'][0]) && !isset($_POST['text'][$_limit])) ? 1 : 0; +$limit_exceeded = isset($_POST['text'][$_limit]) ? 1 : 0; +$pre_mem = memory_get_usage(); +$validation = (!empty($_POST[$_sid]) and isset($_POST['w3c_validate'][0])) ? 1 : 0; +include './htmLawed.php'; + +function format($t){ + $t = "\n". str_replace(array("\t", "\r\n", "\r", '&', '<', '>', "\n"), array(' ', "\n", "\n", '&', '<', '>', "¬
\n"), $t); + return str_replace(array('
', "\n ", ' '), array("\n
\n", "\n ", '  '), $t); +} + +function hexdump($d){ +// Mainly by Aidan Lister , Peter Waller + $hexi = ''; + $ascii = ''; + ob_start(); + echo '
';
+ $offset = 0;
+ $len = strlen($d);
+ for($i=$j=0; $i<$len; $i++)
+ {
+  // Convert to hexidecimal
+  $hexi .= sprintf("%02X ", ord($d[$i]));
+  // Replace non-viewable bytes with '.'
+  if(ord($d[$i]) >= 32){
+   $ascii .= htmlspecialchars($d[$i]);
+  }else{
+   $ascii .= '.';
+  } 
+  // Add extra column spacing
+  if($j == 7){
+   $hexi .= ' ';
+   $ascii .= '  ';
+  }
+  // Add row
+  if(++$j == 16 || $i == $len-1){
+   // Join the hexi / ascii output
+   echo sprintf("%04X   %-49s   %s", $offset, $hexi, $ascii);   
+   // Reset vars
+   $hexi = $ascii = '';
+   $offset += 16;
+   $j = 0;  
+   // Add newline   
+   if ($i !== $len-1){
+    echo "\n";
+   }
+  }
+ }
+ echo '
'; + $o = ob_get_contents(); + ob_end_clean(); + return $o; +} +?> + + + + + + + + +htmLawed (<?php echo hl_version();?>) test + + +
+ +
HTMLAWED TEST
+htm / txt documentation
+ +Input » (max. chars) + +
+ +
+ + +
+ + +'; + } +?> + + + + + + + + + + Validator tools: '; + } +} +?> + +Encoding: + +
+
+ +Input text is too long!
'; +} +?> + +
+ +Settings » + + +
+ +$v){ + if($k[0] == 'h' && $v != 'nil'){ + $cfg[substr($k, 1)] = $v; + } + } + + if($cfg['anti_link_spam'] && (!empty($cfg['anti_link_spam11']) or !empty($cfg['anti_link_spam12']))){ + $cfg['anti_link_spam'] = array($cfg['anti_link_spam11'], $cfg['anti_link_spam12']); + } + unset($cfg['anti_link_spam11'], $cfg['anti_link_spam12']); + if($cfg['anti_mail_spam'] == 1){ + $cfg['anti_mail_spam'] = isset($cfg['anti_mail_spam1'][0]) ? $cfg['anti_mail_spam1'] : 0; + } + unset($cfg['anti_mail_spam11']); + if($cfg['deny_attribute'] == 1){ + $cfg['deny_attribute'] = isset($cfg['deny_attribute1'][0]) ? $cfg['deny_attribute1'] : 0; + } + unset($cfg['deny_attribute1']); + if($cfg['tidy'] == 2){ + $cfg['tidy'] = isset($cfg['tidy2'][0]) ? $cfg['tidy2'] : 0; + } + unset($cfg['tidy2']); + if($cfg['unique_ids'] == 2){ + $cfg['unique_ids'] = isset($cfg['unique_ids2'][0]) ? $cfg['unique_ids2'] : 1; + } + unset($cfg['unique_ids2']); + unset($cfg['and_mark']); // disabling and_mark + + $cfg['show_setting'] = 'hlcfg'; + $st = microtime(); + $out = htmLawed($_POST['text'], $cfg, str_replace(array('$', '{'), '', $_POST['spec'])); + $et = microtime(); + echo '
Input code » ', strlen($_POST['text']), ' chars, ~', round((substr_count($_POST['text'], '>') + substr_count($_POST['text'], '<'))/2), ' tags ', (!isset($_POST['text'][$_hlimit]) ? ' Input binary » ' : ''), ' Finalized settings »  ', '
Output » htmLawed processing time ', number_format(((substr($et,0,9)) + (substr($et,-10)) - (substr($st,0,9)) - (substr($st,-10))),4), ' s', (($mem = memory_get_peak_usage()) !== false ? ', peak memory usage '. round(($mem-$pre_mem)/1048576, 2). ' MB' : ''), '
'; + if($_w3c_validate && $validation) + { +?> + + + + +
Output code »
', format($out), '
', (!isset($_POST['text'][$_hlimit]) ? '
Output binary »' : ''), '
Output rendered »
', $out, '
'; +} +else{ +?> + +
+ +
Use with a Javascript- and cookie-enabled, relatively new version of a common browser. + +
You can use text from this collection of test-cases in the input. Set the character encoding of the browser to Unicode/utf-8 before copying.' : ''); ?> + +

For more about the anti-XSS capability of htmLawed, see this page. +

Submitted input will also be HTML-rendered (XHTML 1) after htmLawed-filtering. +

Change Encoding to reflect the character encoding of the input text. Even then, it may not work or some characters may not display properly because of variable browser support and because of the form interface. Developers can write some PHP code to capture the filtered input to a file if this is important. +

Refer to the htmLawed documentation (htm/txt) for details about Settings, and htmLawed's behavior and limitations. +

For Settings, incorrectly-specified values like regular expressions are silently ignored. One or more settings form-fields may have been disabled. Some characters are not allowed in the Spec field. +

Hovering the mouse over some of the text can provide additional information in some browsers. + + + +

Because of character-encoding issues, the W3C validator (anyway not perfect) may reject validation requests or invalidate otherwise-valid code, esp. if text was copy-pasted in the input box. Local applications like the HTML Validator Firefox browser add-on may be useful in such cases. + + + +
+ + + +
+ + \ No newline at end of file -- cgit v1.2.3