From 3bf72994688ad9292bf37444d80ab5ab1a002748 Mon Sep 17 00:00:00 2001 From: Paweł Sroka Date: Sun, 4 Nov 2012 08:25:28 +0100 Subject: Fixes #1479 - Replaces WP autop with implementation from Steve Clay. --- engine/lib/output.php | 83 ++++++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 40 deletions(-) (limited to 'engine/lib/output.php') diff --git a/engine/lib/output.php b/engine/lib/output.php index 0069360f0..d50576b44 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -43,51 +43,25 @@ function parse_urls($text) { /** * Create paragraphs from text with line spacing - * Borrowed from Wordpress. * * @param string $pee The string - * @param bool $br Add BRs? + * @deprecated Use elgg_autop instead * - * @todo Rewrite * @return string **/ -function autop($pee, $br = 1) { - $pee = $pee . "\n"; // just to make things a little easier, pad the end - $pee = preg_replace('|
\s*
|', "\n\n", $pee); - // Space things out a little - $allblocks = '(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|map|area|blockquote|address|math|style|input|p|h[1-6]|hr)'; - $pee = preg_replace('!(<' . $allblocks . '[^>]*>)!', "\n$1", $pee); - $pee = preg_replace('!()!', "$1\n\n", $pee); - $pee = str_replace(array("\r\n", "\r"), "\n", $pee); // cross-platform newlines - if (strpos($pee, ']*)>\s*|', "", $pee); // no pee inside object/embed - $pee = preg_replace('|\s*\s*|', '', $pee); - } - $pee = preg_replace("/\n\n+/", "\n\n", $pee); // take care of duplicates - $pee = preg_replace('/\n?(.+?)(?:\n\s*\n|\z)/s', "

\n", $pee); // make paragraphs, including one at the end - $pee = preg_replace('|

\s*?

|', '', $pee); // under certain strange conditions it could create a P of entirely whitespace - $pee = preg_replace('!

([^<]+)\s*?(]*>)!', "

$2", $pee); - $pee = preg_replace('|

|', "$1

", $pee); - $pee = preg_replace('!

\s*(]*>)\s*

!', "$1", $pee); // don't pee all over a tag - $pee = preg_replace("|

(|", "$1", $pee); // problem with nested lists - $pee = preg_replace('|

]*)>|i', "

", $pee); - $pee = str_replace('

', '

', $pee); - $pee = preg_replace('!

\s*(]*>)!', "$1", $pee); - $pee = preg_replace('!(]*>)\s*

!', "$1", $pee); - if ($br) { - $pee = preg_replace_callback('/<(script|style).*?<\/\\1>/s', create_function('$matches', 'return str_replace("\n", "", $matches[0]);'), $pee); - $pee = preg_replace('|(?)\s*\n|', "
\n", $pee); // optionally make line breaks - $pee = str_replace('', "\n", $pee); - } - $pee = preg_replace('!(]*>)\s*
!', "$1", $pee); - $pee = preg_replace('!
(\s*]*>)!', '$1', $pee); - //if (strpos($pee, '. Only there because of the comment. - // $pee = preg_replace_callback('!()(.*?)!is', 'clean_pre', $pee ); - //} - $pee = preg_replace("|\n

$|", '

', $pee); - - return $pee; +function autop($pee) { + return elgg_autop($pee); +} + +/** + * Create paragraphs from text with line spacing + * + * @param string $pee The string + * + * @return string + **/ +function elgg_autop($pee) { + return ElggAutop::getInstance()->process($pee); } /** @@ -398,3 +372,32 @@ function elgg_strip_tags($string) { return $string; } + +/** + * Unit tests for Output + * + * @param sting $hook unit_test + * @param string $type system + * @param mixed $value Array of tests + * @param mixed $params Params + * + * @return array + * @access private + */ +function output_unit_test($hook, $type, $value, $params) { + global $CONFIG; + $value[] = $CONFIG->path . 'engine/tests/api/output.php'; + return $value; +} + +/** + * Initialise the Output subsystem. + * + * @return void + * @access private + */ +function output_init() { + elgg_register_plugin_hook_handler('unit_test', 'system', 'output_unit_test'); +} + +elgg_register_event_handler('init', 'system', 'output_init'); -- cgit v1.2.3 From 43a395ae735777bfb5474c4f6a37dc1cd0818a37 Mon Sep 17 00:00:00 2001 From: Brett Profitt Date: Mon, 10 Dec 2012 15:50:25 -0500 Subject: Fixes #1479. Added ElggAutoP. Removing [\n\r] from test strings before compare to deal with differing whitespace between tags among PHP versions. --- engine/classes/ElggAutoP.php | 309 ++++++++++++++++++++++++++++++++++++++++++ engine/classes/ElggAutop.php | 315 ------------------------------------------- engine/lib/output.php | 11 +- engine/tests/api/output.php | 138 ++++++++++--------- 4 files changed, 389 insertions(+), 384 deletions(-) create mode 100644 engine/classes/ElggAutoP.php delete mode 100644 engine/classes/ElggAutop.php (limited to 'engine/lib/output.php') diff --git a/engine/classes/ElggAutoP.php b/engine/classes/ElggAutoP.php new file mode 100644 index 000000000..89d77e583 --- /dev/null +++ b/engine/classes/ElggAutoP.php @@ -0,0 +1,309 @@ +_blocks = preg_split('@\\s+@', $this->_blocks); + $this->_descendList = preg_split('@\\s+@', $this->_descendList); + $this->_alterList = preg_split('@\\s+@', $this->_alterList); + $this->_inlines = preg_split('@\\s+@', $this->_inlines); + $this->_unique = md5(__FILE__); + } + + /** + * Intance of class for singleton pattern. + * @var ElggAutoP + */ + private static $instance; + + /** + * Singleton pattern. + * @return ElggAutoP + */ + public static function getInstance() { + $className = __CLASS__; + if (!(self::$instance instanceof $className)) { + self::$instance = new $className(); + } + return self::$instance; + } + + /** + * Create wrapper P and BR elements in HTML depending on newlines. Useful when + * users use newlines to signal line and paragraph breaks. In all cases output + * should be well-formed markup. + * + * In DIV, LI, TD, and TH elements, Ps are only added when their would be at + * least two of them. + * + * @param string $html snippet + * @return string|false output or false if parse error occurred + */ + public function process($html) { + // normalize whitespace + $html = str_replace(array("\r\n", "\r"), "\n", $html); + + // allows preserving entities untouched + $html = str_replace('&', $this->_unique . 'AMP', $html); + + $this->_doc = new DOMDocument(); + + // parse to DOM, suppressing loadHTML warnings + // http://www.php.net/manual/en/domdocument.loadhtml.php#95463 + libxml_use_internal_errors(true); + + if (!$this->_doc->loadHTML("{$html}" + . "")) { + return false; + } + + $this->_xpath = new DOMXPath($this->_doc); + // start processing recursively at the BODY element + $nodeList = $this->_xpath->query('//body[1]'); + $this->_addParagraphs($nodeList->item(0)); + + // serialize back to HTML + $html = $this->_doc->saveHTML(); + + // split AUTOPs into multiples at /\n\n+/ + $html = preg_replace('/(' . $this->_unique . 'NL){2,}/', '', $html); + $html = str_replace(array($this->_unique . 'BR', $this->_unique . 'NL', '
'), + '
', + $html); + $html = str_replace('
', '', $html); + + // re-parse so we can handle new AUTOP elements + + if (!$this->_doc->loadHTML($html)) { + return false; + } + // must re-create XPath object after DOM load + $this->_xpath = new DOMXPath($this->_doc); + + // strip AUTOPs that only have comments/whitespace + foreach ($this->_xpath->query('//autop') as $autop) { + $hasContent = false; + if (trim($autop->textContent) !== '') { + $hasContent = true; + } else { + foreach ($autop->childNodes as $node) { + if ($node->nodeType === XML_ELEMENT_NODE) { + $hasContent = true; + break; + } + } + } + if (!$hasContent) { + // strip w/ preg_replace later (faster than moving nodes out) + $autop->setAttribute("r", "1"); + } + } + + // remove a single AUTOP inside certain elements + foreach ($this->_xpath->query('//div') as $el) { + $autops = $this->_xpath->query('./autop', $el); + if ($autops->length === 1) { + // strip w/ preg_replace later (faster than moving nodes out) + $autops->item(0)->setAttribute("r", "1"); + } + } + + $html = $this->_doc->saveHTML(); + + // trim to the contents of BODY + $bodyStart = strpos($html, ''); + $bodyEnd = strpos($html, '', $bodyStart + 6); + $html = substr($html, $bodyStart + 6, $bodyEnd - $bodyStart - 6); + + // strip AUTOPs that should be removed + $html = preg_replace('@(.*?)@', '\\1', $html); + + // commit to converting AUTOPs to Ps + $html = str_replace('', "\n

", $html); + $html = str_replace('', "

\n", $html); + + $html = str_replace('
', '
', $html); + $html = str_replace($this->_unique . 'AMP', '&', $html); + return $html; + } + + /** + * Add P and BR elements as necessary + * + * @param DOMElement $el + */ + protected function _addParagraphs(DOMElement $el) { + // no need to recurse, just queue up + $elsToProcess = array($el); + $inlinesToProcess = array(); + while ($el = array_shift($elsToProcess)) { + // if true, we can alter all child nodes, if not, we'll just call + // _addParagraphs on each element in the descendInto list + $alterInline = in_array($el->nodeName, $this->_alterList); + + // inside affected elements, we want to trim leading whitespace from + // the first text node + $ltrimFirstTextNode = true; + + // should we open a new AUTOP element to move inline elements into? + $openP = true; + $autop = null; + + // after BR, ignore a newline + $isFollowingBr = false; + + $node = $el->firstChild; + while (null !== $node) { + if ($alterInline) { + if ($openP) { + $openP = false; + // create a P to move inline content into (this may be removed later) + $autop = $el->insertBefore($this->_doc->createElement('autop'), $node); + } + } + + $isElement = ($node->nodeType === XML_ELEMENT_NODE); + if ($isElement) { + $elName = $node->nodeName; + } + $isBlock = ($isElement && in_array($elName, $this->_blocks)); + + if ($alterInline) { + $isInline = $isElement && ! $isBlock; + $isText = ($node->nodeType === XML_TEXT_NODE); + $isLastInline = (! $node->nextSibling + || ($node->nextSibling->nodeType === XML_ELEMENT_NODE + && in_array($node->nextSibling->nodeName, $this->_blocks))); + if ($isElement) { + $isFollowingBr = ($node->nodeName === 'br'); + } + + if ($isText) { + $nodeText = $node->nodeValue; + if ($ltrimFirstTextNode) { + $nodeText = ltrim($nodeText); + $ltrimFirstTextNode = false; + } + if ($isFollowingBr && preg_match('@^[ \\t]*\\n[ \\t]*@', $nodeText, $m)) { + // if a user ends a line with
, don't add a second BR + $nodeText = substr($nodeText, strlen($m[0])); + } + if ($isLastInline) { + $nodeText = rtrim($nodeText); + } + $nodeText = str_replace("\n", $this->_unique . 'NL', $nodeText); + $tmpNode = $node; + $node = $node->nextSibling; // move loop to next node + + // alter node in place, then move into AUTOP + $tmpNode->nodeValue = $nodeText; + $autop->appendChild($tmpNode); + + continue; + } + } + if ($isBlock || ! $node->nextSibling) { + if ($isBlock) { + if (in_array($node->nodeName, $this->_descendList)) { + $elsToProcess[] = $node; + //$this->_addParagraphs($node); + } + } + $openP = true; + $ltrimFirstTextNode = true; + } + if ($alterInline) { + if (! $isBlock) { + $tmpNode = $node; + if ($isElement && false !== strpos($tmpNode->textContent, "\n")) { + $inlinesToProcess[] = $tmpNode; + } + $node = $node->nextSibling; + $autop->appendChild($tmpNode); + continue; + } + } + + $node = $node->nextSibling; + } + } + + // handle inline nodes + // no need to recurse, just queue up + while ($el = array_shift($inlinesToProcess)) { + $ignoreLeadingNewline = false; + foreach ($el->childNodes as $node) { + if ($node->nodeType === XML_ELEMENT_NODE) { + if ($node->nodeValue === 'BR') { + $ignoreLeadingNewline = true; + } else { + $ignoreLeadingNewline = false; + if (false !== strpos($node->textContent, "\n")) { + $inlinesToProcess[] = $node; + } + } + continue; + } elseif ($node->nodeType === XML_TEXT_NODE) { + $text = $node->nodeValue; + if ($text[0] === "\n" && $ignoreLeadingNewline) { + $text = substr($text, 1); + $ignoreLeadingNewline = false; + } + $node->nodeValue = str_replace("\n", $this->_unique . 'BR', $text); + } + } + } + } +} diff --git a/engine/classes/ElggAutop.php b/engine/classes/ElggAutop.php deleted file mode 100644 index fa0c34225..000000000 --- a/engine/classes/ElggAutop.php +++ /dev/null @@ -1,315 +0,0 @@ - - * @license http://www.opensource.org/licenses/mit-license.php MIT License - */ -class ElggAutop { - - public $encoding = 'UTF-8'; - - /** - * @var DOMDocument - */ - protected $_doc = null; - - /** - * @var DOMXPath - */ - protected $_xpath = null; - - protected $_blocks = 'address article area aside blockquote caption col colgroup dd - details div dl dt fieldset figure figcaption footer form h1 h2 h3 h4 h5 h6 header - hr hgroup legend map math menu nav noscript p pre section select style summary - table tbody td tfoot th thead tr ul ol option li'; - - /** - * @var array - */ - protected $_inlines = 'a abbr audio b button canvas caption cite code command datalist - del dfn em embed i iframe img input ins kbd keygen label map mark meter object - output progress q rp rt ruby s samp script select small source span strong style - sub sup textarea time var video wbr'; - - /** - * Descend into these elements to add Ps - * - * @var array - */ - protected $_descendList = 'article aside blockquote body details div footer form - header section'; - - /** - * Add Ps inside these elements - * - * @var array - */ - protected $_alterList = 'article aside blockquote body details div footer header - section'; - - protected $_unique = ''; - - public function __construct() - { - $this->_blocks = preg_split('@\\s+@', $this->_blocks); - $this->_descendList = preg_split('@\\s+@', $this->_descendList); - $this->_alterList = preg_split('@\\s+@', $this->_alterList); - $this->_inlines = preg_split('@\\s+@', $this->_inlines); - $this->_unique = md5(__FILE__); - } - - /** - * Intance of class for singleton pattern. - * @var ElggAutop - */ - private static $instance; - - /** - * Singleton pattern. - * @return ElggAutop - */ - public static function getInstance() { - $className = __CLASS__; - if (!(self::$instance instanceof $className)) { - self::$instance = new $className(); - } - return self::$instance; - } - - /** - * Create wrapper P and BR elements in HTML depending on newlines. Useful when - * users use newlines to signal line and paragraph breaks. In all cases output - * should be well-formed markup. - * - * In DIV, LI, TD, and TH elements, Ps are only added when their would be at - * least two of them. - * - * @param string $html snippet - * @return string|false output or false if parse error occurred - */ - public function process($html) - { - // normalize whitespace - $html = str_replace(array("\r\n", "\r"), "\n", $html); - - // allows preserving entities untouched - $html = str_replace('&', $this->_unique . 'AMP', $html); - - $this->_doc = new DOMDocument(); - - // parse to DOM, suppressing loadHTML warnings - // http://www.php.net/manual/en/domdocument.loadhtml.php#95463 - libxml_use_internal_errors(true); - if (! @$this->_doc->loadHTML("{$html}" - . "")) { - return false; - } - - $this->_xpath = new DOMXPath($this->_doc); - // start processing recursively at the BODY element - $nodeList = $this->_xpath->query('//body[1]'); - $this->_addParagraphs($nodeList->item(0)); - - // serialize back to HTML - $html = $this->_doc->saveHTML(); - - // split AUTOPs into multiples at /\n\n+/ - $html = preg_replace('/(' . $this->_unique . 'NL){2,}/', '', $html); - $html = str_replace(array($this->_unique . 'BR', $this->_unique . 'NL', '
'), - '
', - $html); - $html = str_replace('
', '', $html); - - // re-parse so we can handle new AUTOP elements - - if (! @$this->_doc->loadHTML($html)) { - return false; - } - // must re-create XPath object after DOM load - $this->_xpath = new DOMXPath($this->_doc); - - // strip AUTOPs that only have comments/whitespace - foreach ($this->_xpath->query('//autop') as $autop) { - $hasContent = false; - if (trim($autop->textContent) !== '') { - $hasContent = true; - } else { - foreach ($autop->childNodes as $node) { - if ($node->nodeType === XML_ELEMENT_NODE) { - $hasContent = true; - break; - } - } - } - if (! $hasContent) { - // strip w/ preg_replace later (faster than moving nodes out) - $autop->setAttribute("r", "1"); - } - } - - // remove a single AUTOP inside certain elements - - foreach ($this->_xpath->query('//div') as $el) { - $autops = $this->_xpath->query('./autop', $el); - if ($autops->length === 1) { - // strip w/ preg_replace later (faster than moving nodes out) - $autops->item(0)->setAttribute("r", "1"); - } - } - - $html = $this->_doc->saveHTML(); - - // trim to the contents of BODY - $bodyStart = strpos($html, ''); - $bodyEnd = strpos($html, '', $bodyStart + 6); - $html = substr($html, $bodyStart + 6, $bodyEnd - $bodyStart - 6); - - // strip AUTOPs that should be removed - $html = preg_replace('@(.*?)@', '\\1', $html); - - // commit to converting AUTOPs to Ps - $html = str_replace('', "\n

", $html); - $html = str_replace('', "

\n", $html); - - $html = str_replace('
', '
', $html); - $html = str_replace($this->_unique . 'AMP', '&', $html); - return $html; - } - - /** - * Add P and BR elements as necessary - * - * @param DOMElement $el - */ - protected function _addParagraphs(DOMElement $el) - { - // no need to recurse, just queue up - $elsToProcess = array($el); - $inlinesToProcess = array(); - while ($el = array_shift($elsToProcess)) { - // if true, we can alter all child nodes, if not, we'll just call - // _addParagraphs on each element in the descendInto list - $alterInline = in_array($el->nodeName, $this->_alterList); - - // inside affected elements, we want to trim leading whitespace from - // the first text node - $ltrimFirstTextNode = true; - - // should we open a new AUTOP element to move inline elements into? - $openP = true; - $autop = null; - - // after BR, ignore a newline - $isFollowingBr = false; - - $node = $el->firstChild; - while (null !== $node) { - if ($alterInline) { - if ($openP) { - $openP = false; - // create a P to move inline content into (this may be removed later) - $autop = $el->insertBefore($this->_doc->createElement('autop'), $node); - } - } - - $isElement = ($node->nodeType === XML_ELEMENT_NODE); - if ($isElement) { - $elName = $node->nodeName; - } - $isBlock = ($isElement && in_array($elName, $this->_blocks)); - - if ($alterInline) { - $isInline = $isElement && ! $isBlock; - $isText = ($node->nodeType === XML_TEXT_NODE); - $isLastInline = (! $node->nextSibling - || ($node->nextSibling->nodeType === XML_ELEMENT_NODE - && in_array($node->nextSibling->nodeName, $this->_blocks))); - if ($isElement) { - $isFollowingBr = ($node->nodeName === 'br'); - } - - if ($isText) { - $nodeText = $node->nodeValue; - if ($ltrimFirstTextNode) { - $nodeText = ltrim($nodeText); - $ltrimFirstTextNode = false; - } - if ($isFollowingBr && preg_match('@^[ \\t]*\\n[ \\t]*@', $nodeText, $m)) { - // if a user ends a line with
, don't add a second BR - $nodeText = substr($nodeText, strlen($m[0])); - } - if ($isLastInline) { - $nodeText = rtrim($nodeText); - } - $nodeText = str_replace("\n", $this->_unique . 'NL', $nodeText); - $tmpNode = $node; - $node = $node->nextSibling; // move loop to next node - - // alter node in place, then move into AUTOP - $tmpNode->nodeValue = $nodeText; - $autop->appendChild($tmpNode); - - continue; - } - } - if ($isBlock || ! $node->nextSibling) { - if ($isBlock) { - if (in_array($node->nodeName, $this->_descendList)) { - $elsToProcess[] = $node; - //$this->_addParagraphs($node); - } - } - $openP = true; - $ltrimFirstTextNode = true; - } - if ($alterInline) { - if (! $isBlock) { - $tmpNode = $node; - if ($isElement && false !== strpos($tmpNode->textContent, "\n")) { - $inlinesToProcess[] = $tmpNode; - } - $node = $node->nextSibling; - $autop->appendChild($tmpNode); - continue; - } - } - - $node = $node->nextSibling; - } - } - - // handle inline nodes - // no need to recurse, just queue up - while ($el = array_shift($inlinesToProcess)) { - $ignoreLeadingNewline = false; - foreach ($el->childNodes as $node) { - if ($node->nodeType === XML_ELEMENT_NODE) { - if ($node->nodeValue === 'BR') { - $ignoreLeadingNewline = true; - } else { - $ignoreLeadingNewline = false; - if (false !== strpos($node->textContent, "\n")) { - $inlinesToProcess[] = $node; - } - } - continue; - } elseif ($node->nodeType === XML_TEXT_NODE) { - $text = $node->nodeValue; - if ($text[0] === "\n" && $ignoreLeadingNewline) { - $text = substr($text, 1); - $ignoreLeadingNewline = false; - } - $node->nodeValue = str_replace("\n", $this->_unique . 'BR', $text); - } - } - } - } -} diff --git a/engine/lib/output.php b/engine/lib/output.php index cce1c7cba..bff0bf6e9 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -16,7 +16,7 @@ **/ function parse_urls($text) { // @todo this causes problems with - // must be ing format (no space). + // must be in format (no space). // By default htmlawed rewrites tags to this format. // if PHP supported conditional negative lookbehinds we could use this: // $r = preg_replace_callback('/(?"\'\!,]+)/i', @@ -46,6 +46,7 @@ function parse_urls($text) { * * @param string $pee The string * @deprecated Use elgg_autop instead + * @todo Add deprecation warning in 1.9 * * @return string **/ @@ -56,12 +57,12 @@ function autop($pee) { /** * Create paragraphs from text with line spacing * - * @param string $pee The string + * @param string $string The string * * @return string **/ -function elgg_autop($pee) { - return ElggAutop::getInstance()->process($pee); +function elgg_autop($string) { + return ElggAutoP::getInstance()->process($string); } /** @@ -358,7 +359,7 @@ function elgg_get_friendly_time($time) { /** * Strip tags and offer plugins the chance. * Plugins register for output:strip_tags plugin hook. - * Original string included in $params['original_string'] + * Original string included in $params['original_string'] * * @param string $string Formatted string * diff --git a/engine/tests/api/output.php b/engine/tests/api/output.php index eb1a66b29..c3d5aa8c6 100644 --- a/engine/tests/api/output.php +++ b/engine/tests/api/output.php @@ -1,64 +1,74 @@ - - */ -class ElggCoreOutputAutoPTest extends ElggCoreUnitTest { - - /** - * @var ElggAutop - */ - protected $_autop; - - public function setUp() { - $this->_autop = new ElggAutop(); - } - - public function testDomRoundtrip() - { - $d = dir(dirname(__DIR__) . '/test_files/output/autop'); - $in = file_get_contents($d->path . "/domdoc_in.html"); - $exp = file_get_contents($d->path . "/domdoc_exp.html"); - - $doc = new DOMDocument(); - libxml_use_internal_errors(true); - $doc->loadHTML("" - . $in . ''); - $serialized = $doc->saveHTML(); - list(,$out) = explode('', $serialized, 2); - list($out) = explode('', $out, 2); - - $this->assertEqual($exp, $out, "DOMDocument's parsing/serialization roundtrip"); - } - - public function testProcess() - { - $data = $this->provider(); - foreach ($data as $row) { - list($test, $in, $exp) = $row; - $out = $this->_autop->process($in); - $this->assertEqual($exp, $out, "Equality case {$test}"); - } - } - - public function provider() - { - $d = dir(dirname(__DIR__) . '/test_files/output/autop'); - $tests = array(); - while (false !== ($entry = $d->read())) { - if (preg_match('/^([a-z\\-]+)\.in\.html$/i', $entry, $m)) { - $tests[] = $m[1]; - } - } - - $data = array(); - foreach ($tests as $test) { - $data[] = array( - $test, - file_get_contents($d->path . '/' . "{$test}.in.html"), - file_get_contents($d->path . '/' . "{$test}.exp.html"), - ); - } - return $data; - } -} +_autop = new ElggAutoP(); + } + + public function testDomRoundtrip() { + $d = dir(dirname(dirname(__FILE__)) . '/test_files/output/autop'); + $in = file_get_contents($d->path . "/domdoc_in.html"); + $exp = file_get_contents($d->path . "/domdoc_exp.html"); + $exp = $this->flattenString($exp); + + $doc = new DOMDocument(); + libxml_use_internal_errors(true); + $doc->loadHTML("" + . $in . ''); + $serialized = $doc->saveHTML(); + list(,$out) = explode('', $serialized, 2); + list($out) = explode('', $out, 2); + $out = $this->flattenString($out); + + $this->assertEqual($exp, $out, "DOMDocument's parsing/serialization roundtrip"); + } + + public function testProcess() { + $data = $this->provider(); + foreach ($data as $row) { + list($test, $in, $exp) = $row; + $exp = $this->flattenString($exp); + $out = $this->_autop->process($in); + $out = $this->flattenString($out); + + $this->assertEqual($exp, $out, "Equality case {$test}"); + } + } + + public function provider() { + $d = dir(dirname(dirname(__FILE__)) . '/test_files/output/autop'); + $tests = array(); + while (false !== ($entry = $d->read())) { + if (preg_match('/^([a-z\\-]+)\.in\.html$/i', $entry, $m)) { + $tests[] = $m[1]; + } + } + + $data = array(); + foreach ($tests as $test) { + $data[] = array( + $test, + file_get_contents($d->path . '/' . "{$test}.in.html"), + file_get_contents($d->path . '/' . "{$test}.exp.html"), + ); + } + return $data; + } + + /** + * Different versions of PHP return different whitespace between tags. + * Removing all line breaks normalizes that. + */ + public function flattenString($string) { + $r = preg_replace('/[\n\r]+/', '', $string); + return $r; + } +} \ No newline at end of file -- cgit v1.2.3 From c54fd6c6de4442f308f999caf36ae075de42a69c Mon Sep 17 00:00:00 2001 From: cash Date: Fri, 25 Jan 2013 18:16:19 -0500 Subject: Fixes #4972 decode special chars before encoding for friendly titles --- engine/lib/output.php | 2 ++ 1 file changed, 2 insertions(+) (limited to 'engine/lib/output.php') diff --git a/engine/lib/output.php b/engine/lib/output.php index bff0bf6e9..9295f2173 100644 --- a/engine/lib/output.php +++ b/engine/lib/output.php @@ -287,6 +287,8 @@ function elgg_get_friendly_title($title) { // handle some special cases $title = str_replace('&', 'and', $title); + // quotes and angle brackets stored in the database as html encoded + $title = htmlspecialchars_decode($title); $title = ElggTranslit::urlize($title); -- cgit v1.2.3