diff options
Diffstat (limited to 'lib/dokuwiki/inc/parser')
-rw-r--r-- | lib/dokuwiki/inc/parser/code.php | 58 | ||||
-rw-r--r-- | lib/dokuwiki/inc/parser/handler.php | 1697 | ||||
-rw-r--r-- | lib/dokuwiki/inc/parser/lexer.php | 600 | ||||
-rw-r--r-- | lib/dokuwiki/inc/parser/metadata.php | 483 | ||||
-rw-r--r-- | lib/dokuwiki/inc/parser/parser.php | 956 | ||||
-rw-r--r-- | lib/dokuwiki/inc/parser/renderer.php | 322 | ||||
-rw-r--r-- | lib/dokuwiki/inc/parser/xhtml.php | 1142 | ||||
-rw-r--r-- | lib/dokuwiki/inc/parser/xhtmlsummary.php | 90 |
8 files changed, 5348 insertions, 0 deletions
diff --git a/lib/dokuwiki/inc/parser/code.php b/lib/dokuwiki/inc/parser/code.php new file mode 100644 index 000000000..4d94dcf4e --- /dev/null +++ b/lib/dokuwiki/inc/parser/code.php @@ -0,0 +1,58 @@ +<?php +/** + * A simple renderer that allows downloading of code and file snippets + * + * @author Andreas Gohr <andi@splitbrain.org> + */ +if(!defined('DOKU_INC')) die('meh.'); +require_once DOKU_INC . 'inc/parser/renderer.php'; + +class Doku_Renderer_code extends Doku_Renderer { + var $_codeblock=0; + + /** + * Send the wanted code block to the browser + * + * When the correct block was found it exits the script. + */ + function code($text, $language = NULL, $filename='' ) { + if(!$language) $language = 'txt'; + if(!$filename) $filename = 'snippet.'.$language; + $filename = basename($filename); + + if($this->_codeblock == $_REQUEST['codeblock']){ + header("Content-Type: text/plain; charset=utf-8"); + header("Content-Disposition: attachment; filename=$filename"); + header("X-Robots-Tag: noindex"); + echo trim($text,"\r\n"); + exit; + } + + $this->_codeblock++; + } + + /** + * Wraps around code() + */ + function file($text, $language = NULL, $filename='') { + $this->code($text, $language, $filename); + } + + /** + * This should never be reached, if it is send a 404 + */ + function document_end() { + header("HTTP/1.0 404 Not Found"); + echo '404 - Not found'; + exit; + } + + /** + * Return the format of the renderer + * + * @returns string 'code' + */ + function getFormat(){ + return 'code'; + } +} diff --git a/lib/dokuwiki/inc/parser/handler.php b/lib/dokuwiki/inc/parser/handler.php new file mode 100644 index 000000000..9fe5866ad --- /dev/null +++ b/lib/dokuwiki/inc/parser/handler.php @@ -0,0 +1,1697 @@ +<?php +if(!defined('DOKU_INC')) die('meh.'); +if (!defined('DOKU_PARSER_EOL')) define('DOKU_PARSER_EOL',"\n"); // add this to make handling test cases simpler + +class Doku_Handler { + + var $Renderer = NULL; + + var $CallWriter = NULL; + + var $calls = array(); + + var $status = array( + 'section' => false, + 'section_edit_start' => -1, + 'section_edit_level' => 1, + 'section_edit_title' => '' + ); + + var $rewriteBlocks = true; + + function Doku_Handler() { + $this->CallWriter = & new Doku_Handler_CallWriter($this); + } + + function _addCall($handler, $args, $pos) { + $call = array($handler,$args, $pos); + $this->CallWriter->writeCall($call); + } + + function addPluginCall($plugin, $args, $state, $pos, $match) { + $call = array('plugin',array($plugin, $args, $state, $match), $pos); + $this->CallWriter->writeCall($call); + } + + function _finalize(){ + + $this->CallWriter->finalise(); + + if ( $this->status['section'] ) { + $last_call = end($this->calls); + array_push($this->calls,array('section_close',array(), $last_call[2])); + if ($this->status['section_edit_start']>1) { + // ignore last edit section if there is only one header + array_push($this->calls,array('section_edit',array($this->status['section_edit_start'], 0, $this->status['section_edit_level'], $this->status['section_edit_title']), $last_call[2])); + } + } + + if ( $this->rewriteBlocks ) { + $B = & new Doku_Handler_Block(); + $this->calls = $B->process($this->calls); + } + + trigger_event('PARSER_HANDLER_DONE',$this); + + array_unshift($this->calls,array('document_start',array(),0)); + $last_call = end($this->calls); + array_push($this->calls,array('document_end',array(),$last_call[2])); + } + + function fetch() { + $call = each($this->calls); + if ( $call ) { + return $call['value']; + } + return false; + } + + + /** + * Special plugin handler + * + * This handler is called for all modes starting with 'plugin_'. + * An additional parameter with the plugin name is passed + * + * @author Andreas Gohr <andi@splitbrain.org> + */ + function plugin($match, $state, $pos, $pluginname){ + $data = array($match); + $plugin =& plugin_load('syntax',$pluginname); + if($plugin != null){ + $data = $plugin->handle($match, $state, $pos, $this); + } + if ($data !== false) { + $this->addPluginCall($pluginname,$data,$state,$pos,$match); + } + return true; + } + + function base($match, $state, $pos) { + switch ( $state ) { + case DOKU_LEXER_UNMATCHED: + $this->_addCall('cdata',array($match), $pos); + return true; + break; + } + } + + function header($match, $state, $pos) { + global $conf; + + // get level and title + $title = trim($match); + $level = 7 - strspn($title,'='); + if($level < 1) $level = 1; + $title = trim($title,'='); + $title = trim($title); + + if ($this->status['section']) $this->_addCall('section_close',array(),$pos); + + if ($level<=$conf['maxseclevel']) { + $this->_addCall('section_edit',array($this->status['section_edit_start'], $pos-1, $this->status['section_edit_level'], $this->status['section_edit_title']), $pos); + $this->status['section_edit_start'] = $pos; + $this->status['section_edit_level'] = $level; + $this->status['section_edit_title'] = $title; + } + + $this->_addCall('header',array($title,$level,$pos), $pos); + + $this->_addCall('section_open',array($level),$pos); + $this->status['section'] = true; + return true; + } + + function notoc($match, $state, $pos) { + $this->_addCall('notoc',array(),$pos); + return true; + } + + function nocache($match, $state, $pos) { + $this->_addCall('nocache',array(),$pos); + return true; + } + + function linebreak($match, $state, $pos) { + $this->_addCall('linebreak',array(),$pos); + return true; + } + + function eol($match, $state, $pos) { + $this->_addCall('eol',array(),$pos); + return true; + } + + function hr($match, $state, $pos) { + $this->_addCall('hr',array(),$pos); + return true; + } + + function _nestingTag($match, $state, $pos, $name) { + switch ( $state ) { + case DOKU_LEXER_ENTER: + $this->_addCall($name.'_open', array(), $pos); + break; + case DOKU_LEXER_EXIT: + $this->_addCall($name.'_close', array(), $pos); + break; + case DOKU_LEXER_UNMATCHED: + $this->_addCall('cdata',array($match), $pos); + break; + } + } + + function strong($match, $state, $pos) { + $this->_nestingTag($match, $state, $pos, 'strong'); + return true; + } + + function emphasis($match, $state, $pos) { + $this->_nestingTag($match, $state, $pos, 'emphasis'); + return true; + } + + function underline($match, $state, $pos) { + $this->_nestingTag($match, $state, $pos, 'underline'); + return true; + } + + function monospace($match, $state, $pos) { + $this->_nestingTag($match, $state, $pos, 'monospace'); + return true; + } + + function subscript($match, $state, $pos) { + $this->_nestingTag($match, $state, $pos, 'subscript'); + return true; + } + + function superscript($match, $state, $pos) { + $this->_nestingTag($match, $state, $pos, 'superscript'); + return true; + } + + function deleted($match, $state, $pos) { + $this->_nestingTag($match, $state, $pos, 'deleted'); + return true; + } + + + function footnote($match, $state, $pos) { +// $this->_nestingTag($match, $state, $pos, 'footnote'); + if (!isset($this->_footnote)) $this->_footnote = false; + + switch ( $state ) { + case DOKU_LEXER_ENTER: + // footnotes can not be nested - however due to limitations in lexer it can't be prevented + // we will still enter a new footnote mode, we just do nothing + if ($this->_footnote) { + $this->_addCall('cdata',array($match), $pos); + break; + } + + $this->_footnote = true; + + $ReWriter = & new Doku_Handler_Nest($this->CallWriter,'footnote_close'); + $this->CallWriter = & $ReWriter; + $this->_addCall('footnote_open', array(), $pos); + break; + case DOKU_LEXER_EXIT: + // check whether we have already exitted the footnote mode, can happen if the modes were nested + if (!$this->_footnote) { + $this->_addCall('cdata',array($match), $pos); + break; + } + + $this->_footnote = false; + + $this->_addCall('footnote_close', array(), $pos); + $this->CallWriter->process(); + $ReWriter = & $this->CallWriter; + $this->CallWriter = & $ReWriter->CallWriter; + break; + case DOKU_LEXER_UNMATCHED: + $this->_addCall('cdata', array($match), $pos); + break; + } + return true; + } + + function listblock($match, $state, $pos) { + switch ( $state ) { + case DOKU_LEXER_ENTER: + $ReWriter = & new Doku_Handler_List($this->CallWriter); + $this->CallWriter = & $ReWriter; + $this->_addCall('list_open', array($match), $pos); + break; + case DOKU_LEXER_EXIT: + $this->_addCall('list_close', array(), $pos); + $this->CallWriter->process(); + $ReWriter = & $this->CallWriter; + $this->CallWriter = & $ReWriter->CallWriter; + break; + case DOKU_LEXER_MATCHED: + $this->_addCall('list_item', array($match), $pos); + break; + case DOKU_LEXER_UNMATCHED: + $this->_addCall('cdata', array($match), $pos); + break; + } + return true; + } + + function unformatted($match, $state, $pos) { + if ( $state == DOKU_LEXER_UNMATCHED ) { + $this->_addCall('unformatted',array($match), $pos); + } + return true; + } + + function php($match, $state, $pos) { + global $conf; + if ( $state == DOKU_LEXER_UNMATCHED ) { + $this->_addCall('php',array($match), $pos); + } + return true; + } + + function phpblock($match, $state, $pos) { + global $conf; + if ( $state == DOKU_LEXER_UNMATCHED ) { + $this->_addCall('phpblock',array($match), $pos); + } + return true; + } + + function html($match, $state, $pos) { + global $conf; + if ( $state == DOKU_LEXER_UNMATCHED ) { + $this->_addCall('html',array($match), $pos); + } + return true; + } + + function htmlblock($match, $state, $pos) { + global $conf; + if ( $state == DOKU_LEXER_UNMATCHED ) { + $this->_addCall('htmlblock',array($match), $pos); + } + return true; + } + + function preformatted($match, $state, $pos) { + switch ( $state ) { + case DOKU_LEXER_ENTER: + $ReWriter = & new Doku_Handler_Preformatted($this->CallWriter); + $this->CallWriter = & $ReWriter; + $this->_addCall('preformatted_start',array(), $pos); + break; + case DOKU_LEXER_EXIT: + $this->_addCall('preformatted_end',array(), $pos); + $this->CallWriter->process(); + $ReWriter = & $this->CallWriter; + $this->CallWriter = & $ReWriter->CallWriter; + break; + case DOKU_LEXER_MATCHED: + $this->_addCall('preformatted_newline',array(), $pos); + break; + case DOKU_LEXER_UNMATCHED: + $this->_addCall('preformatted_content',array($match), $pos); + break; + } + + return true; + } + + function quote($match, $state, $pos) { + + switch ( $state ) { + + case DOKU_LEXER_ENTER: + $ReWriter = & new Doku_Handler_Quote($this->CallWriter); + $this->CallWriter = & $ReWriter; + $this->_addCall('quote_start',array($match), $pos); + break; + + case DOKU_LEXER_EXIT: + $this->_addCall('quote_end',array(), $pos); + $this->CallWriter->process(); + $ReWriter = & $this->CallWriter; + $this->CallWriter = & $ReWriter->CallWriter; + break; + + case DOKU_LEXER_MATCHED: + $this->_addCall('quote_newline',array($match), $pos); + break; + + case DOKU_LEXER_UNMATCHED: + $this->_addCall('cdata',array($match), $pos); + break; + + } + + return true; + } + + function file($match, $state, $pos) { + return $this->code($match, $state, $pos, 'file'); + } + + function code($match, $state, $pos, $type='code') { + if ( $state == DOKU_LEXER_UNMATCHED ) { + $matches = explode('>',$match,2); + $matches[0] = trim($matches[0]); + + list($language,$filename) = explode(' ',$matches[0],2); + $language = trim($language); + $filename = trim($filename); + if ( $language == '' ) $language = null; + if ( $language == '-' ) $language = null; + if ( $filename == '' ) $filename = null; + # We shortcut html here. + if($language == 'html') $language = 'html4strict'; + $this->_addCall( + $type, + array($matches[1],$language,$filename), + $pos + ); + } + return true; + } + + function acronym($match, $state, $pos) { + $this->_addCall('acronym',array($match), $pos); + return true; + } + + function smiley($match, $state, $pos) { + $this->_addCall('smiley',array($match), $pos); + return true; + } + + function wordblock($match, $state, $pos) { + $this->_addCall('wordblock',array($match), $pos); + return true; + } + + function entity($match, $state, $pos) { + $this->_addCall('entity',array($match), $pos); + return true; + } + + function multiplyentity($match, $state, $pos) { + preg_match_all('/\d+/',$match,$matches); + $this->_addCall('multiplyentity',array($matches[0][0],$matches[0][1]), $pos); + return true; + } + + function singlequoteopening($match, $state, $pos) { + $this->_addCall('singlequoteopening',array(), $pos); + return true; + } + + function singlequoteclosing($match, $state, $pos) { + $this->_addCall('singlequoteclosing',array(), $pos); + return true; + } + + function apostrophe($match, $state, $pos) { + $this->_addCall('apostrophe',array(), $pos); + return true; + } + + function doublequoteopening($match, $state, $pos) { + $this->_addCall('doublequoteopening',array(), $pos); + return true; + } + + function doublequoteclosing($match, $state, $pos) { + $this->_addCall('doublequoteclosing',array(), $pos); + return true; + } + + function camelcaselink($match, $state, $pos) { + $this->_addCall('camelcaselink',array($match), $pos); + return true; + } + + /* + */ + function internallink($match, $state, $pos) { + // Strip the opening and closing markup + $link = preg_replace(array('/^\[\[/','/\]\]$/u'),'',$match); + + // Split title from URL + $link = explode('|',$link,2); + if ( !isset($link[1]) ) { + $link[1] = NULL; + } else if ( preg_match('/^\{\{[^\}]+\}\}$/',$link[1]) ) { + // If the title is an image, convert it to an array containing the image details + $link[1] = Doku_Handler_Parse_Media($link[1]); + } + $link[0] = trim($link[0]); + + //decide which kind of link it is + + if ( preg_match('/^[a-zA-Z0-9\.]+>{1}.*$/u',$link[0]) ) { + // Interwiki + $interwiki = explode('>',$link[0],2); + $this->_addCall( + 'interwikilink', + array($link[0],$link[1],strtolower($interwiki[0]),$interwiki[1]), + $pos + ); + }elseif ( preg_match('/^\\\\\\\\[\w.:?\-;,]+?\\\\/u',$link[0]) ) { + // Windows Share + $this->_addCall( + 'windowssharelink', + array($link[0],$link[1]), + $pos + ); + }elseif ( preg_match('#^([a-z0-9\-\.+]+?)://#i',$link[0]) ) { + // external link (accepts all protocols) + $this->_addCall( + 'externallink', + array($link[0],$link[1]), + $pos + ); + }elseif ( preg_match('<'.PREG_PATTERN_VALID_EMAIL.'>',$link[0]) ) { + // E-Mail (pattern above is defined in inc/mail.php) + $this->_addCall( + 'emaillink', + array($link[0],$link[1]), + $pos + ); + }elseif ( preg_match('!^#.+!',$link[0]) ){ + // local link + $this->_addCall( + 'locallink', + array(substr($link[0],1),$link[1]), + $pos + ); + }else{ + // internal link + $this->_addCall( + 'internallink', + array($link[0],$link[1]), + $pos + ); + } + + return true; + } + + function filelink($match, $state, $pos) { + $this->_addCall('filelink',array($match, NULL), $pos); + return true; + } + + function windowssharelink($match, $state, $pos) { + $this->_addCall('windowssharelink',array($match, NULL), $pos); + return true; + } + + function media($match, $state, $pos) { + $p = Doku_Handler_Parse_Media($match); + + $this->_addCall( + $p['type'], + array($p['src'], $p['title'], $p['align'], $p['width'], + $p['height'], $p['cache'], $p['linking']), + $pos + ); + return true; + } + + function rss($match, $state, $pos) { + $link = preg_replace(array('/^\{\{rss>/','/\}\}$/'),'',$match); + + // get params + list($link,$params) = explode(' ',$link,2); + + $p = array(); + if(preg_match('/\b(\d+)\b/',$params,$match)){ + $p['max'] = $match[1]; + }else{ + $p['max'] = 8; + } + $p['reverse'] = (preg_match('/rev/',$params)); + $p['author'] = (preg_match('/\b(by|author)/',$params)); + $p['date'] = (preg_match('/\b(date)/',$params)); + $p['details'] = (preg_match('/\b(desc|detail)/',$params)); + + if (preg_match('/\b(\d+)([dhm])\b/',$params,$match)) { + $period = array('d' => 86400, 'h' => 3600, 'm' => 60); + $p['refresh'] = max(600,$match[1]*$period[$match[2]]); // n * period in seconds, minimum 10 minutes + } else { + $p['refresh'] = 14400; // default to 4 hours + } + + $this->_addCall('rss',array($link,$p),$pos); + return true; + } + + function externallink($match, $state, $pos) { + $url = $match; + $title = null; + + // add protocol on simple short URLs + if(substr($url,0,3) == 'ftp' && (substr($url,0,6) != 'ftp://')){ + $title = $url; + $url = 'ftp://'.$url; + } + if(substr($url,0,3) == 'www' && (substr($url,0,7) != 'http://')){ + $title = $url; + $url = 'http://'.$url; + } + + $this->_addCall('externallink',array($url, $title), $pos); + return true; + } + + function emaillink($match, $state, $pos) { + $email = preg_replace(array('/^</','/>$/'),'',$match); + $this->_addCall('emaillink',array($email, NULL), $pos); + return true; + } + + function table($match, $state, $pos) { + switch ( $state ) { + + case DOKU_LEXER_ENTER: + + $ReWriter = & new Doku_Handler_Table($this->CallWriter); + $this->CallWriter = & $ReWriter; + + $this->_addCall('table_start', array(), $pos); + if ( trim($match) == '^' ) { + $this->_addCall('tableheader', array(), $pos); + } else { + $this->_addCall('tablecell', array(), $pos); + } + break; + + case DOKU_LEXER_EXIT: + $this->_addCall('table_end', array(), $pos); + $this->CallWriter->process(); + $ReWriter = & $this->CallWriter; + $this->CallWriter = & $ReWriter->CallWriter; + break; + + case DOKU_LEXER_UNMATCHED: + if ( trim($match) != '' ) { + $this->_addCall('cdata',array($match), $pos); + } + break; + + case DOKU_LEXER_MATCHED: + if ( $match == ' ' ){ + $this->_addCall('cdata', array($match), $pos); + } else if ( preg_match('/:::/',$match) ) { + $this->_addCall('rowspan', array($match), $pos); + } else if ( preg_match('/\t+/',$match) ) { + $this->_addCall('table_align', array($match), $pos); + } else if ( preg_match('/ {2,}/',$match) ) { + $this->_addCall('table_align', array($match), $pos); + } else if ( $match == "\n|" ) { + $this->_addCall('table_row', array(), $pos); + $this->_addCall('tablecell', array(), $pos); + } else if ( $match == "\n^" ) { + $this->_addCall('table_row', array(), $pos); + $this->_addCall('tableheader', array(), $pos); + } else if ( $match == '|' ) { + $this->_addCall('tablecell', array(), $pos); + } else if ( $match == '^' ) { + $this->_addCall('tableheader', array(), $pos); + } + break; + } + return true; + } +} + +//------------------------------------------------------------------------ +function Doku_Handler_Parse_Media($match) { + + // Strip the opening and closing markup + $link = preg_replace(array('/^\{\{/','/\}\}$/u'),'',$match); + + // Split title from URL + $link = explode('|',$link,2); + + + // Check alignment + $ralign = (bool)preg_match('/^ /',$link[0]); + $lalign = (bool)preg_match('/ $/',$link[0]); + + // Logic = what's that ;)... + if ( $lalign & $ralign ) { + $align = 'center'; + } else if ( $ralign ) { + $align = 'right'; + } else if ( $lalign ) { + $align = 'left'; + } else { + $align = NULL; + } + + // The title... + if ( !isset($link[1]) ) { + $link[1] = NULL; + } + + //remove aligning spaces + $link[0] = trim($link[0]); + + //split into src and parameters (using the very last questionmark) + $pos = strrpos($link[0], '?'); + if($pos !== false){ + $src = substr($link[0],0,$pos); + $param = substr($link[0],$pos+1); + }else{ + $src = $link[0]; + $param = ''; + } + + //parse width and height + if(preg_match('#(\d+)(x(\d+))?#i',$param,$size)){ + ($size[1]) ? $w = $size[1] : $w = NULL; + ($size[3]) ? $h = $size[3] : $h = NULL; + } else { + $w = NULL; + $h = NULL; + } + + //get linking command + if(preg_match('/nolink/i',$param)){ + $linking = 'nolink'; + }else if(preg_match('/direct/i',$param)){ + $linking = 'direct'; + }else if(preg_match('/linkonly/i',$param)){ + $linking = 'linkonly'; + }else{ + $linking = 'details'; + } + + //get caching command + if (preg_match('/(nocache|recache)/i',$param,$cachemode)){ + $cache = $cachemode[1]; + }else{ + $cache = 'cache'; + } + + // Check whether this is a local or remote image + if ( preg_match('#^(https?|ftp)#i',$src) ) { + $call = 'externalmedia'; + } else { + $call = 'internalmedia'; + } + + $params = array( + 'type'=>$call, + 'src'=>$src, + 'title'=>$link[1], + 'align'=>$align, + 'width'=>$w, + 'height'=>$h, + 'cache'=>$cache, + 'linking'=>$linking, + ); + + return $params; +} + +//------------------------------------------------------------------------ +class Doku_Handler_CallWriter { + + var $Handler; + + function Doku_Handler_CallWriter(& $Handler) { + $this->Handler = & $Handler; + } + + function writeCall($call) { + $this->Handler->calls[] = $call; + } + + function writeCalls($calls) { + $this->Handler->calls = array_merge($this->Handler->calls, $calls); + } + + // function is required, but since this call writer is first/highest in + // the chain it is not required to do anything + function finalise() { + } +} + +//------------------------------------------------------------------------ +/** + * Generic call writer class to handle nesting of rendering instructions + * within a render instruction. Also see nest() method of renderer base class + * + * @author Chris Smith <chris@jalakai.co.uk> + */ +class Doku_Handler_Nest { + + var $CallWriter; + var $calls = array(); + + var $closingInstruction; + + /** + * constructor + * + * @param object $CallWriter the renderers current call writer + * @param string $close closing instruction name, this is required to properly terminate the + * syntax mode if the document ends without a closing pattern + */ + function Doku_Handler_Nest(& $CallWriter, $close="nest_close") { + $this->CallWriter = & $CallWriter; + + $this->closingInstruction = $close; + } + + function writeCall($call) { + $this->calls[] = $call; + } + + function writeCalls($calls) { + $this->calls = array_merge($this->calls, $calls); + } + + function finalise() { + $last_call = end($this->calls); + $this->writeCall(array($this->closingInstruction,array(), $last_call[2])); + + $this->process(); + $this->CallWriter->finalise(); + } + + function process() { + // merge consecutive cdata + $unmerged_calls = $this->calls; + $this->calls = array(); + + foreach ($unmerged_calls as $call) $this->addCall($call); + + $first_call = reset($this->calls); + $this->CallWriter->writeCall(array("nest", array($this->calls), $first_call[2])); + } + + function addCall($call) { + $key = count($this->calls); + if ($key and ($call[0] == 'cdata') and ($this->calls[$key-1][0] == 'cdata')) { + $this->calls[$key-1][1][0] .= $call[1][0]; + } else if ($call[0] == 'eol') { + // do nothing (eol shouldn't be allowed, to counter preformatted fix in #1652 & #1699) + } else { + $this->calls[] = $call; + } + } +} + +class Doku_Handler_List { + + var $CallWriter; + + var $calls = array(); + var $listCalls = array(); + var $listStack = array(); + + function Doku_Handler_List(& $CallWriter) { + $this->CallWriter = & $CallWriter; + } + + function writeCall($call) { + $this->calls[] = $call; + } + + // Probably not needed but just in case... + function writeCalls($calls) { + $this->calls = array_merge($this->calls, $calls); +# $this->CallWriter->writeCalls($this->calls); + } + + function finalise() { + $last_call = end($this->calls); + $this->writeCall(array('list_close',array(), $last_call[2])); + + $this->process(); + $this->CallWriter->finalise(); + } + + //------------------------------------------------------------------------ + function process() { + + foreach ( $this->calls as $call ) { + switch ($call[0]) { + case 'list_item': + $this->listOpen($call); + break; + case 'list_open': + $this->listStart($call); + break; + case 'list_close': + $this->listEnd($call); + break; + default: + $this->listContent($call); + break; + } + } + + $this->CallWriter->writeCalls($this->listCalls); + } + + //------------------------------------------------------------------------ + function listStart($call) { + $depth = $this->interpretSyntax($call[1][0], $listType); + + $this->initialDepth = $depth; + $this->listStack[] = array($listType, $depth); + + $this->listCalls[] = array('list'.$listType.'_open',array(),$call[2]); + $this->listCalls[] = array('listitem_open',array(1),$call[2]); + $this->listCalls[] = array('listcontent_open',array(),$call[2]); + } + + //------------------------------------------------------------------------ + function listEnd($call) { + $closeContent = true; + + while ( $list = array_pop($this->listStack) ) { + if ( $closeContent ) { + $this->listCalls[] = array('listcontent_close',array(),$call[2]); + $closeContent = false; + } + $this->listCalls[] = array('listitem_close',array(),$call[2]); + $this->listCalls[] = array('list'.$list[0].'_close', array(), $call[2]); + } + } + + //------------------------------------------------------------------------ + function listOpen($call) { + $depth = $this->interpretSyntax($call[1][0], $listType); + $end = end($this->listStack); + + // Not allowed to be shallower than initialDepth + if ( $depth < $this->initialDepth ) { + $depth = $this->initialDepth; + } + + //------------------------------------------------------------------------ + if ( $depth == $end[1] ) { + + // Just another item in the list... + if ( $listType == $end[0] ) { + $this->listCalls[] = array('listcontent_close',array(),$call[2]); + $this->listCalls[] = array('listitem_close',array(),$call[2]); + $this->listCalls[] = array('listitem_open',array($depth-1),$call[2]); + $this->listCalls[] = array('listcontent_open',array(),$call[2]); + + // Switched list type... + } else { + + $this->listCalls[] = array('listcontent_close',array(),$call[2]); + $this->listCalls[] = array('listitem_close',array(),$call[2]); + $this->listCalls[] = array('list'.$end[0].'_close', array(), $call[2]); + $this->listCalls[] = array('list'.$listType.'_open', array(), $call[2]); + $this->listCalls[] = array('listitem_open', array($depth-1), $call[2]); + $this->listCalls[] = array('listcontent_open',array(),$call[2]); + + array_pop($this->listStack); + $this->listStack[] = array($listType, $depth); + } + + //------------------------------------------------------------------------ + // Getting deeper... + } else if ( $depth > $end[1] ) { + + $this->listCalls[] = array('listcontent_close',array(),$call[2]); + $this->listCalls[] = array('list'.$listType.'_open', array(), $call[2]); + $this->listCalls[] = array('listitem_open', array($depth-1), $call[2]); + $this->listCalls[] = array('listcontent_open',array(),$call[2]); + + $this->listStack[] = array($listType, $depth); + + //------------------------------------------------------------------------ + // Getting shallower ( $depth < $end[1] ) + } else { + $this->listCalls[] = array('listcontent_close',array(),$call[2]); + $this->listCalls[] = array('listitem_close',array(),$call[2]); + $this->listCalls[] = array('list'.$end[0].'_close',array(),$call[2]); + + // Throw away the end - done + array_pop($this->listStack); + + while (1) { + $end = end($this->listStack); + + if ( $end[1] <= $depth ) { + + // Normalize depths + $depth = $end[1]; + + $this->listCalls[] = array('listitem_close',array(),$call[2]); + + if ( $end[0] == $listType ) { + $this->listCalls[] = array('listitem_open',array($depth-1),$call[2]); + $this->listCalls[] = array('listcontent_open',array(),$call[2]); + + } else { + // Switching list type... + $this->listCalls[] = array('list'.$end[0].'_close', array(), $call[2]); + $this->listCalls[] = array('list'.$listType.'_open', array(), $call[2]); + $this->listCalls[] = array('listitem_open', array($depth-1), $call[2]); + $this->listCalls[] = array('listcontent_open',array(),$call[2]); + + array_pop($this->listStack); + $this->listStack[] = array($listType, $depth); + } + + break; + + // Haven't dropped down far enough yet.... ( $end[1] > $depth ) + } else { + + $this->listCalls[] = array('listitem_close',array(),$call[2]); + $this->listCalls[] = array('list'.$end[0].'_close',array(),$call[2]); + + array_pop($this->listStack); + + } + + } + + } + } + + //------------------------------------------------------------------------ + function listContent($call) { + $this->listCalls[] = $call; + } + + //------------------------------------------------------------------------ + function interpretSyntax($match, & $type) { + if ( substr($match,-1) == '*' ) { + $type = 'u'; + } else { + $type = 'o'; + } + // Is the +1 needed? It used to be count(explode(...)) + // but I don't think the number is seen outside this handler + return substr_count(str_replace("\t",' ',$match), ' ') + 1; + } +} + +//------------------------------------------------------------------------ +class Doku_Handler_Preformatted { + + var $CallWriter; + + var $calls = array(); + var $pos; + var $text =''; + + + + function Doku_Handler_Preformatted(& $CallWriter) { + $this->CallWriter = & $CallWriter; + } + + function writeCall($call) { + $this->calls[] = $call; + } + + // Probably not needed but just in case... + function writeCalls($calls) { + $this->calls = array_merge($this->calls, $calls); +# $this->CallWriter->writeCalls($this->calls); + } + + function finalise() { + $last_call = end($this->calls); + $this->writeCall(array('preformatted_end',array(), $last_call[2])); + + $this->process(); + $this->CallWriter->finalise(); + } + + function process() { + foreach ( $this->calls as $call ) { + switch ($call[0]) { + case 'preformatted_start': + $this->pos = $call[2]; + break; + case 'preformatted_newline': + $this->text .= "\n"; + break; + case 'preformatted_content': + $this->text .= $call[1][0]; + break; + case 'preformatted_end': + if (trim($this->text)) { + $this->CallWriter->writeCall(array('preformatted',array($this->text),$this->pos)); + } + // see FS#1699 & FS#1652, add 'eol' instructions to ensure proper triggering of following p_open + $this->CallWriter->writeCall(array('eol',array(),$this->pos)); + $this->CallWriter->writeCall(array('eol',array(),$this->pos)); + break; + } + } + } + +} + +//------------------------------------------------------------------------ +class Doku_Handler_Quote { + + var $CallWriter; + + var $calls = array(); + + var $quoteCalls = array(); + + function Doku_Handler_Quote(& $CallWriter) { + $this->CallWriter = & $CallWriter; + } + + function writeCall($call) { + $this->calls[] = $call; + } + + // Probably not needed but just in case... + function writeCalls($calls) { + $this->calls = array_merge($this->calls, $calls); + } + + function finalise() { + $last_call = end($this->calls); + $this->writeCall(array('quote_end',array(), $last_call[2])); + + $this->process(); + $this->CallWriter->finalise(); + } + + function process() { + + $quoteDepth = 1; + + foreach ( $this->calls as $call ) { + switch ($call[0]) { + + case 'quote_start': + + $this->quoteCalls[] = array('quote_open',array(),$call[2]); + + case 'quote_newline': + + $quoteLength = $this->getDepth($call[1][0]); + + if ( $quoteLength > $quoteDepth ) { + $quoteDiff = $quoteLength - $quoteDepth; + for ( $i = 1; $i <= $quoteDiff; $i++ ) { + $this->quoteCalls[] = array('quote_open',array(),$call[2]); + } + } else if ( $quoteLength < $quoteDepth ) { + $quoteDiff = $quoteDepth - $quoteLength; + for ( $i = 1; $i <= $quoteDiff; $i++ ) { + $this->quoteCalls[] = array('quote_close',array(),$call[2]); + } + } else { + if ($call[0] != 'quote_start') $this->quoteCalls[] = array('linebreak',array(),$call[2]); + } + + $quoteDepth = $quoteLength; + + break; + + case 'quote_end': + + if ( $quoteDepth > 1 ) { + $quoteDiff = $quoteDepth - 1; + for ( $i = 1; $i <= $quoteDiff; $i++ ) { + $this->quoteCalls[] = array('quote_close',array(),$call[2]); + } + } + + $this->quoteCalls[] = array('quote_close',array(),$call[2]); + + $this->CallWriter->writeCalls($this->quoteCalls); + break; + + default: + $this->quoteCalls[] = $call; + break; + } + } + } + + function getDepth($marker) { + preg_match('/>{1,}/', $marker, $matches); + $quoteLength = strlen($matches[0]); + return $quoteLength; + } +} + +//------------------------------------------------------------------------ +class Doku_Handler_Table { + + var $CallWriter; + + var $calls = array(); + var $tableCalls = array(); + var $maxCols = 0; + var $maxRows = 1; + var $currentCols = 0; + var $firstCell = false; + var $lastCellType = 'tablecell'; + + function Doku_Handler_Table(& $CallWriter) { + $this->CallWriter = & $CallWriter; + } + + function writeCall($call) { + $this->calls[] = $call; + } + + // Probably not needed but just in case... + function writeCalls($calls) { + $this->calls = array_merge($this->calls, $calls); + } + + function finalise() { + $last_call = end($this->calls); + $this->writeCall(array('table_end',array(), $last_call[2])); + + $this->process(); + $this->CallWriter->finalise(); + } + + //------------------------------------------------------------------------ + function process() { + foreach ( $this->calls as $call ) { + switch ( $call[0] ) { + case 'table_start': + $this->tableStart($call); + break; + case 'table_row': + $this->tableRowClose(array('tablerow_close',$call[1],$call[2])); + $this->tableRowOpen(array('tablerow_open',$call[1],$call[2])); + break; + case 'tableheader': + case 'tablecell': + $this->tableCell($call); + break; + case 'table_end': + $this->tableRowClose(array('tablerow_close',$call[1],$call[2])); + $this->tableEnd($call); + break; + default: + $this->tableDefault($call); + break; + } + } + $this->CallWriter->writeCalls($this->tableCalls); + } + + function tableStart($call) { + $this->tableCalls[] = array('table_open',array(),$call[2]); + $this->tableCalls[] = array('tablerow_open',array(),$call[2]); + $this->firstCell = true; + } + + function tableEnd($call) { + $this->tableCalls[] = array('table_close',array(),$call[2]); + $this->finalizeTable(); + } + + function tableRowOpen($call) { + $this->tableCalls[] = $call; + $this->currentCols = 0; + $this->firstCell = true; + $this->lastCellType = 'tablecell'; + $this->maxRows++; + } + + function tableRowClose($call) { + // Strip off final cell opening and anything after it + while ( $discard = array_pop($this->tableCalls ) ) { + + if ( $discard[0] == 'tablecell_open' || $discard[0] == 'tableheader_open') { + break; + } + } + $this->tableCalls[] = $call; + + if ( $this->currentCols > $this->maxCols ) { + $this->maxCols = $this->currentCols; + } + } + + function tableCell($call) { + if ( !$this->firstCell ) { + + // Increase the span + $lastCall = end($this->tableCalls); + + // A cell call which follows an open cell means an empty cell so span + if ( $lastCall[0] == 'tablecell_open' || $lastCall[0] == 'tableheader_open' ) { + $this->tableCalls[] = array('colspan',array(),$call[2]); + + } + + $this->tableCalls[] = array($this->lastCellType.'_close',array(),$call[2]); + $this->tableCalls[] = array($call[0].'_open',array(1,NULL,1),$call[2]); + $this->lastCellType = $call[0]; + + } else { + + $this->tableCalls[] = array($call[0].'_open',array(1,NULL,1),$call[2]); + $this->lastCellType = $call[0]; + $this->firstCell = false; + + } + + $this->currentCols++; + } + + function tableDefault($call) { + $this->tableCalls[] = $call; + } + + function finalizeTable() { + + // Add the max cols and rows to the table opening + if ( $this->tableCalls[0][0] == 'table_open' ) { + // Adjust to num cols not num col delimeters + $this->tableCalls[0][1][] = $this->maxCols - 1; + $this->tableCalls[0][1][] = $this->maxRows; + } else { + trigger_error('First element in table call list is not table_open'); + } + + $lastRow = 0; + $lastCell = 0; + $cellKey = array(); + $toDelete = array(); + + // Look for the colspan elements and increment the colspan on the + // previous non-empty opening cell. Once done, delete all the cells + // that contain colspans + foreach ( $this->tableCalls as $key => $call ) { + + if ( $call[0] == 'tablerow_open' ) { + + $lastRow++; + $lastCell = 0; + + } else if ( $call[0] == 'tablecell_open' || $call[0] == 'tableheader_open' ) { + + $lastCell++; + $cellKey[$lastRow][$lastCell] = $key; + + } else if ( $call[0] == 'table_align' ) { + + $prev = in_array($this->tableCalls[$key-1][0], array('tablecell_open', 'tableheader_open')); + $next = in_array($this->tableCalls[$key+1][0], array('tablecell_close', 'tableheader_close')); + // If the cell is empty, align left + if ($prev && $next) { + $this->tableCalls[$key-1][1][1] = 'left'; + + // If the previous element was a cell open, align right + } elseif ($prev) { + $this->tableCalls[$key-1][1][1] = 'right'; + + // If the next element is the close of an element, align either center or left + } elseif ( $next) { + if ( $this->tableCalls[$cellKey[$lastRow][$lastCell]][1][1] == 'right' ) { + $this->tableCalls[$cellKey[$lastRow][$lastCell]][1][1] = 'center'; + } else { + $this->tableCalls[$cellKey[$lastRow][$lastCell]][1][1] = 'left'; + } + + } + + // Now convert the whitespace back to cdata + $this->tableCalls[$key][0] = 'cdata'; + + } else if ( $call[0] == 'colspan' ) { + + $this->tableCalls[$key-1][1][0] = false; + + for($i = $key-2; $i >= $cellKey[$lastRow][1]; $i--) { + + if ( $this->tableCalls[$i][0] == 'tablecell_open' || $this->tableCalls[$i][0] == 'tableheader_open' ) { + + if ( false !== $this->tableCalls[$i][1][0] ) { + $this->tableCalls[$i][1][0]++; + break; + } + + + } + } + + $toDelete[] = $key-1; + $toDelete[] = $key; + $toDelete[] = $key+1; + + } else if ( $call[0] == 'rowspan' ) { + + if ( $this->tableCalls[$key-1][0] == 'cdata' ) { + // ignore rowspan if previous call was cdata (text mixed with :::) we don't have to check next call as that wont match regex + $this->tableCalls[$key][0] = 'cdata'; + + } else { + + $this->tableCalls[$key-1][1][2] = false; + + for($i = $lastRow-1; $i > 0; $i--) { + + if ( $this->tableCalls[$cellKey[$i][$lastCell]][0] == 'tablecell_open' || $this->tableCalls[$cellKey[$i][$lastCell]][0] == 'tableheader_open' ) { + + if ( false !== $this->tableCalls[$cellKey[$i][$lastCell]][1][2] ) { + $this->tableCalls[$cellKey[$i][$lastCell]][1][2]++; + break; + } + + + } + } + + $toDelete[] = $key-1; + $toDelete[] = $key; + $toDelete[] = $key+1; + } + } + } + + + // condense cdata + $cnt = count($this->tableCalls); + for( $key = 0; $key < $cnt; $key++){ + if($this->tableCalls[$key][0] == 'cdata'){ + $ckey = $key; + $key++; + while($this->tableCalls[$key][0] == 'cdata'){ + $this->tableCalls[$ckey][1][0] .= $this->tableCalls[$key][1][0]; + $toDelete[] = $key; + $key++; + } + continue; + } + } + + foreach ( $toDelete as $delete ) { + unset($this->tableCalls[$delete]); + } + $this->tableCalls = array_values($this->tableCalls); + } +} + + +/** + * Handler for paragraphs + * + * @author Harry Fuecks <hfuecks@gmail.com> + */ +class Doku_Handler_Block { + + var $calls = array(); + + var $blockStack = array(); + + var $inParagraph = false; + var $atStart = true; + var $skipEolKey = -1; + + // Blocks these should not be inside paragraphs + var $blockOpen = array( + 'header', + 'listu_open','listo_open','listitem_open','listcontent_open', + 'table_open','tablerow_open','tablecell_open','tableheader_open', + 'quote_open', + 'section_open', // Needed to prevent p_open between header and section_open + 'code','file','hr','preformatted','rss', + 'htmlblock','phpblock', + ); + + var $blockClose = array( + 'header', + 'listu_close','listo_close','listitem_close','listcontent_close', + 'table_close','tablerow_close','tablecell_close','tableheader_close', + 'quote_close', + 'section_close', // Needed to prevent p_close after section_close + 'code','file','hr','preformatted','rss', + 'htmlblock','phpblock', + ); + + // Stacks can contain paragraphs + var $stackOpen = array( + 'footnote_open','section_open', + ); + + var $stackClose = array( + 'footnote_close','section_close', + ); + + + /** + * Constructor. Adds loaded syntax plugins to the block and stack + * arrays + * + * @author Andreas Gohr <andi@splitbrain.org> + */ + function Doku_Handler_Block(){ + global $DOKU_PLUGINS; + //check if syntax plugins were loaded + if(empty($DOKU_PLUGINS['syntax'])) return; + foreach($DOKU_PLUGINS['syntax'] as $n => $p){ + $ptype = $p->getPType(); + if($ptype == 'block'){ + $this->blockOpen[] = 'plugin_'.$n; + $this->blockClose[] = 'plugin_'.$n; + }elseif($ptype == 'stack'){ + $this->stackOpen[] = 'plugin_'.$n; + $this->stackClose[] = 'plugin_'.$n; + } + } + } + + /** + * Close a paragraph if needed + * + * This function makes sure there are no empty paragraphs on the stack + * + * @author Andreas Gohr <andi@splitbrain.org> + */ + function closeParagraph($pos){ + // look back if there was any content - we don't want empty paragraphs + $content = ''; + for($i=count($this->calls)-1; $i>=0; $i--){ + if($this->calls[$i][0] == 'p_open'){ + break; + }elseif($this->calls[$i][0] == 'cdata'){ + $content .= $this->calls[$i][1][0]; + }else{ + $content = 'found markup'; + break; + } + } + + if(trim($content)==''){ + //remove the whole paragraph + array_splice($this->calls,$i); + }else{ + if ($this->calls[count($this->calls)-1][0] == 'section_edit') { + $tmp = array_pop($this->calls); + $this->calls[] = array('p_close',array(), $pos); + $this->calls[] = $tmp; + } else { + $this->calls[] = array('p_close',array(), $pos); + } + } + + $this->inParagraph = false; + } + + /** + * Processes the whole instruction stack to open and close paragraphs + * + * @author Harry Fuecks <hfuecks@gmail.com> + * @author Andreas Gohr <andi@splitbrain.org> + * @todo This thing is really messy and should be rewritten + */ + function process($calls) { + foreach ( $calls as $key => $call ) { + $cname = $call[0]; + if($cname == 'plugin') { + $cname='plugin_'.$call[1][0]; + + $plugin = true; + $plugin_open = (($call[1][2] == DOKU_LEXER_ENTER) || ($call[1][2] == DOKU_LEXER_SPECIAL)); + $plugin_close = (($call[1][2] == DOKU_LEXER_EXIT) || ($call[1][2] == DOKU_LEXER_SPECIAL)); + } else { + $plugin = false; + } + + // Process blocks which are stack like... (contain linefeeds) + if ( in_array($cname,$this->stackOpen ) && (!$plugin || $plugin_open) ) { + + $this->calls[] = $call; + + // Hack - footnotes shouldn't immediately contain a p_open + if ( $cname != 'footnote_open' ) { + $this->addToStack(); + } else { + $this->addToStack(false); + } + continue; + } + + if ( in_array($cname,$this->stackClose ) && (!$plugin || $plugin_close)) { + + if ( $this->inParagraph ) { + $this->closeParagraph($call[2]); + } + $this->calls[] = $call; + $this->removeFromStack(); + continue; + } + + if ( !$this->atStart ) { + + if ( $cname == 'eol' ) { + + // Check this isn't an eol instruction to skip... + if ( $this->skipEolKey != $key ) { + // Look to see if the next instruction is an EOL + if ( isset($calls[$key+1]) && $calls[$key+1][0] == 'eol' ) { + + if ( $this->inParagraph ) { + //$this->calls[] = array('p_close',array(), $call[2]); + $this->closeParagraph($call[2]); + } + + $this->calls[] = array('p_open',array(), $call[2]); + $this->inParagraph = true; + + + // Mark the next instruction for skipping + $this->skipEolKey = $key+1; + + }else{ + //if this is just a single eol make a space from it + $this->addCall(array('cdata',array(DOKU_PARSER_EOL), $call[2])); + } + } + + + } else { + + $storeCall = true; + if ( $this->inParagraph && (in_array($cname, $this->blockOpen) && (!$plugin || $plugin_open))) { + $this->closeParagraph($call[2]); + $this->calls[] = $call; + $storeCall = false; + } + + if ( in_array($cname, $this->blockClose) && (!$plugin || $plugin_close)) { + if ( $this->inParagraph ) { + $this->closeParagraph($call[2]); + } + if ( $storeCall ) { + $this->calls[] = $call; + $storeCall = false; + } + + // This really sucks and suggests this whole class sucks but... + if ( isset($calls[$key+1])) { + $cname_plusone = $calls[$key+1][0]; + if ($cname_plusone == 'plugin') { + $cname_plusone = 'plugin'.$calls[$key+1][1][0]; + + // plugin test, true if plugin has a state which precludes it requiring blockOpen or blockClose + $plugin_plusone = true; + $plugin_test = ($call[$key+1][1][2] == DOKU_LEXER_MATCHED) || ($call[$key+1][1][2] == DOKU_LEXER_MATCHED); + } else { + $plugin_plusone = false; + } + if ((!in_array($cname_plusone, $this->blockOpen) && !in_array($cname_plusone, $this->blockClose)) || + ($plugin_plusone && $plugin_test) + ) { + + $this->calls[] = array('p_open',array(), $call[2]); + $this->inParagraph = true; + } + } + } + + if ( $storeCall ) { + $this->addCall($call); + } + + } + + + } else { + + // Unless there's already a block at the start, start a paragraph + if ( !in_array($cname,$this->blockOpen) ) { + $this->calls[] = array('p_open',array(), $call[2]); + if ( $call[0] != 'eol' ) { + $this->calls[] = $call; + } + $this->atStart = false; + $this->inParagraph = true; + } else { + $this->addCall($call); + $this->atStart = false; + } + + } + + } + + if ( $this->inParagraph ) { + if ( $cname == 'p_open' ) { + // Ditch the last call + array_pop($this->calls); + } else if ( !in_array($cname, $this->blockClose) ) { + //$this->calls[] = array('p_close',array(), $call[2]); + $this->closeParagraph($call[2]); + } else { + $last_call = array_pop($this->calls); + //$this->calls[] = array('p_close',array(), $call[2]); + $this->closeParagraph($call[2]); + $this->calls[] = $last_call; + } + } + + return $this->calls; + } + + function addToStack($newStart = true) { + $this->blockStack[] = array($this->atStart, $this->inParagraph); + $this->atStart = $newStart; + $this->inParagraph = false; + } + + function removeFromStack() { + $state = array_pop($this->blockStack); + $this->atStart = $state[0]; + $this->inParagraph = $state[1]; + } + + function addCall($call) { + $key = count($this->calls); + if ($key and ($call[0] == 'cdata') and ($this->calls[$key-1][0] == 'cdata')) { + $this->calls[$key-1][1][0] .= $call[1][0]; + } else { + $this->calls[] = $call; + } + } +} + +//Setup VIM: ex: et ts=4 enc=utf-8 : diff --git a/lib/dokuwiki/inc/parser/lexer.php b/lib/dokuwiki/inc/parser/lexer.php new file mode 100644 index 000000000..afd260a05 --- /dev/null +++ b/lib/dokuwiki/inc/parser/lexer.php @@ -0,0 +1,600 @@ +<?php +/** +* Author Markus Baker: http://www.lastcraft.com +* Version adapted from Simple Test: http://sourceforge.net/projects/simpletest/ +* For an intro to the Lexer see: +* http://www.phppatterns.com/index.php/article/articleview/106/1/2/ +* @author Marcus Baker +* @package Doku +* @subpackage Lexer +* @version $Id: lexer.php,v 1.1 2005/03/23 23:14:09 harryf Exp $ +*/ + +/** +* Init path constant +*/ +if(!defined('DOKU_INC')) die('meh.'); + +/**#@+ + * lexer mode constant + */ +define("DOKU_LEXER_ENTER", 1); +define("DOKU_LEXER_MATCHED", 2); +define("DOKU_LEXER_UNMATCHED", 3); +define("DOKU_LEXER_EXIT", 4); +define("DOKU_LEXER_SPECIAL", 5); +/**#@-*/ + +/** + * Compounded regular expression. Any of + * the contained patterns could match and + * when one does it's label is returned. + * @package Doku + * @subpackage Lexer + */ +class Doku_LexerParallelRegex { + var $_patterns; + var $_labels; + var $_regex; + var $_case; + + /** + * Constructor. Starts with no patterns. + * @param boolean $case True for case sensitive, false + * for insensitive. + * @access public + */ + function Doku_LexerParallelRegex($case) { + $this->_case = $case; + $this->_patterns = array(); + $this->_labels = array(); + $this->_regex = null; + } + + /** + * Adds a pattern with an optional label. + * @param mixed $pattern Perl style regex. Must be UTF-8 + * encoded. If its a string, the (, ) + * lose their meaning unless they + * form part of a lookahead or + * lookbehind assertation. + * @param string $label Label of regex to be returned + * on a match. Label must be ASCII + * @access public + */ + function addPattern($pattern, $label = true) { + $count = count($this->_patterns); + $this->_patterns[$count] = $pattern; + $this->_labels[$count] = $label; + $this->_regex = null; + } + + /** + * Attempts to match all patterns at once against + * a string. + * @param string $subject String to match against. + * @param string $match First matched portion of + * subject. + * @return boolean True on success. + * @access public + */ + function match($subject, &$match) { + if (count($this->_patterns) == 0) { + return false; + } + if (! preg_match($this->_getCompoundedRegex(), $subject, $matches)) { + $match = ""; + return false; + } + + $match = $matches[0]; + $size = count($matches); + for ($i = 1; $i < $size; $i++) { + if ($matches[$i] && isset($this->_labels[$i - 1])) { + return $this->_labels[$i - 1]; + } + } + return true; + } + + /** + * Attempts to split the string against all patterns at once + * + * @param string $subject String to match against. + * @param array $split The split result: array containing, pre-match, match & post-match strings + * @return boolean True on success. + * @access public + * + * @author Christopher Smith <chris@jalakai.co.uk> + */ + function split($subject, &$split) { + if (count($this->_patterns) == 0) { + return false; + } + + if (! preg_match($this->_getCompoundedRegex(), $subject, $matches)) { + if(function_exists('preg_last_error')){ + $err = preg_last_error(); + switch($err){ + case PREG_BACKTRACK_LIMIT_ERROR: + msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini',-1); + break; + case PREG_RECURSION_LIMIT_ERROR: + msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini',-1); + break; + case PREG_BAD_UTF8_ERROR: + msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin',-1); + break; + case PREG_INTERNAL_ERROR: + msg('A PCRE internal error occured. This might be caused by a faulty plugin',-1); + break; + } + } + + $split = array($subject, "", ""); + return false; + } + + $idx = count($matches)-2; + list($pre, $post) = preg_split($this->_patterns[$idx].$this->_getPerlMatchingFlags(), $subject, 2); + $split = array($pre, $matches[0], $post); + + return isset($this->_labels[$idx]) ? $this->_labels[$idx] : true; + } + + /** + * Compounds the patterns into a single + * regular expression separated with the + * "or" operator. Caches the regex. + * Will automatically escape (, ) and / tokens. + * @param array $patterns List of patterns in order. + * @access private + */ + function _getCompoundedRegex() { + if ($this->_regex == null) { + $cnt = count($this->_patterns); + for ($i = 0; $i < $cnt; $i++) { + + /* + * decompose the input pattern into "(", "(?", ")", + * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"... + * elements. + */ + preg_match_all('/\\\\.|' . + '\(\?|' . + '[()]|' . + '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' . + '[^[()\\\\]+/', $this->_patterns[$i], $elts); + + $pattern = ""; + $level = 0; + + foreach ($elts[0] as $elt) { + /* + * for "(", ")" remember the nesting level, add "\" + * only to the non-"(?" ones. + */ + + switch($elt) { + case '(': + $pattern .= '\('; + break; + case ')': + if ($level > 0) + $level--; /* closing (? */ + else + $pattern .= '\\'; + $pattern .= ')'; + break; + case '(?': + $level++; + $pattern .= '(?'; + break; + default: + if (substr($elt, 0, 1) == '\\') + $pattern .= $elt; + else + $pattern .= str_replace('/', '\/', $elt); + } + } + $this->_patterns[$i] = "($pattern)"; + } + $this->_regex = "/" . implode("|", $this->_patterns) . "/" . $this->_getPerlMatchingFlags(); + } + return $this->_regex; + } + + /** + * Accessor for perl regex mode flags to use. + * @return string Perl regex flags. + * @access private + */ + function _getPerlMatchingFlags() { + return ($this->_case ? "msS" : "msSi"); + } +} + +/** + * States for a stack machine. + * @package Lexer + * @subpackage Lexer + */ +class Doku_LexerStateStack { + var $_stack; + + /** + * Constructor. Starts in named state. + * @param string $start Starting state name. + * @access public + */ + function Doku_LexerStateStack($start) { + $this->_stack = array($start); + } + + /** + * Accessor for current state. + * @return string State. + * @access public + */ + function getCurrent() { + return $this->_stack[count($this->_stack) - 1]; + } + + /** + * Adds a state to the stack and sets it + * to be the current state. + * @param string $state New state. + * @access public + */ + function enter($state) { + array_push($this->_stack, $state); + } + + /** + * Leaves the current state and reverts + * to the previous one. + * @return boolean False if we drop off + * the bottom of the list. + * @access public + */ + function leave() { + if (count($this->_stack) == 1) { + return false; + } + array_pop($this->_stack); + return true; + } +} + +/** + * Accepts text and breaks it into tokens. + * Some optimisation to make the sure the + * content is only scanned by the PHP regex + * parser once. Lexer modes must not start + * with leading underscores. + * @package Doku + * @subpackage Lexer + */ +class Doku_Lexer { + var $_regexes; + var $_parser; + var $_mode; + var $_mode_handlers; + var $_case; + + /** + * Sets up the lexer in case insensitive matching + * by default. + * @param Doku_Parser $parser Handling strategy by + * reference. + * @param string $start Starting handler. + * @param boolean $case True for case sensitive. + * @access public + */ + function Doku_Lexer(&$parser, $start = "accept", $case = false) { + $this->_case = $case; + $this->_regexes = array(); + $this->_parser = &$parser; + $this->_mode = &new Doku_LexerStateStack($start); + $this->_mode_handlers = array(); + } + + /** + * Adds a token search pattern for a particular + * parsing mode. The pattern does not change the + * current mode. + * @param string $pattern Perl style regex, but ( and ) + * lose the usual meaning. + * @param string $mode Should only apply this + * pattern when dealing with + * this type of input. + * @access public + */ + function addPattern($pattern, $mode = "accept") { + if (! isset($this->_regexes[$mode])) { + $this->_regexes[$mode] = new Doku_LexerParallelRegex($this->_case); + } + $this->_regexes[$mode]->addPattern($pattern); + } + + /** + * Adds a pattern that will enter a new parsing + * mode. Useful for entering parenthesis, strings, + * tags, etc. + * @param string $pattern Perl style regex, but ( and ) + * lose the usual meaning. + * @param string $mode Should only apply this + * pattern when dealing with + * this type of input. + * @param string $new_mode Change parsing to this new + * nested mode. + * @access public + */ + function addEntryPattern($pattern, $mode, $new_mode) { + if (! isset($this->_regexes[$mode])) { + $this->_regexes[$mode] = new Doku_LexerParallelRegex($this->_case); + } + $this->_regexes[$mode]->addPattern($pattern, $new_mode); + } + + /** + * Adds a pattern that will exit the current mode + * and re-enter the previous one. + * @param string $pattern Perl style regex, but ( and ) + * lose the usual meaning. + * @param string $mode Mode to leave. + * @access public + */ + function addExitPattern($pattern, $mode) { + if (! isset($this->_regexes[$mode])) { + $this->_regexes[$mode] = new Doku_LexerParallelRegex($this->_case); + } + $this->_regexes[$mode]->addPattern($pattern, "__exit"); + } + + /** + * Adds a pattern that has a special mode. Acts as an entry + * and exit pattern in one go, effectively calling a special + * parser handler for this token only. + * @param string $pattern Perl style regex, but ( and ) + * lose the usual meaning. + * @param string $mode Should only apply this + * pattern when dealing with + * this type of input. + * @param string $special Use this mode for this one token. + * @access public + */ + function addSpecialPattern($pattern, $mode, $special) { + if (! isset($this->_regexes[$mode])) { + $this->_regexes[$mode] = new Doku_LexerParallelRegex($this->_case); + } + $this->_regexes[$mode]->addPattern($pattern, "_$special"); + } + + /** + * Adds a mapping from a mode to another handler. + * @param string $mode Mode to be remapped. + * @param string $handler New target handler. + * @access public + */ + function mapHandler($mode, $handler) { + $this->_mode_handlers[$mode] = $handler; + } + + /** + * Splits the page text into tokens. Will fail + * if the handlers report an error or if no + * content is consumed. If successful then each + * unparsed and parsed token invokes a call to the + * held listener. + * @param string $raw Raw HTML text. + * @return boolean True on success, else false. + * @access public + */ + function parse($raw) { + if (! isset($this->_parser)) { + return false; + } + $initialLength = strlen($raw); + $length = $initialLength; + $pos = 0; + while (is_array($parsed = $this->_reduce($raw))) { + list($unmatched, $matched, $mode) = $parsed; + $currentLength = strlen($raw); + $matchPos = $initialLength - $currentLength - strlen($matched); + if (! $this->_dispatchTokens($unmatched, $matched, $mode, $pos, $matchPos)) { + return false; + } + if ($currentLength == $length) { + return false; + } + $length = $currentLength; + $pos = $initialLength - $currentLength; + } + if (!$parsed) { + return false; + } + return $this->_invokeParser($raw, DOKU_LEXER_UNMATCHED, $pos); + } + + /** + * Sends the matched token and any leading unmatched + * text to the parser changing the lexer to a new + * mode if one is listed. + * @param string $unmatched Unmatched leading portion. + * @param string $matched Actual token match. + * @param string $mode Mode after match. A boolean + * false mode causes no change. + * @param int $pos Current byte index location in raw doc + * thats being parsed + * @return boolean False if there was any error + * from the parser. + * @access private + */ + function _dispatchTokens($unmatched, $matched, $mode = false, $initialPos, $matchPos) { + if (! $this->_invokeParser($unmatched, DOKU_LEXER_UNMATCHED, $initialPos) ){ + return false; + } + if ($this->_isModeEnd($mode)) { + if (! $this->_invokeParser($matched, DOKU_LEXER_EXIT, $matchPos)) { + return false; + } + return $this->_mode->leave(); + } + if ($this->_isSpecialMode($mode)) { + $this->_mode->enter($this->_decodeSpecial($mode)); + if (! $this->_invokeParser($matched, DOKU_LEXER_SPECIAL, $matchPos)) { + return false; + } + return $this->_mode->leave(); + } + if (is_string($mode)) { + $this->_mode->enter($mode); + return $this->_invokeParser($matched, DOKU_LEXER_ENTER, $matchPos); + } + return $this->_invokeParser($matched, DOKU_LEXER_MATCHED, $matchPos); + } + + /** + * Tests to see if the new mode is actually to leave + * the current mode and pop an item from the matching + * mode stack. + * @param string $mode Mode to test. + * @return boolean True if this is the exit mode. + * @access private + */ + function _isModeEnd($mode) { + return ($mode === "__exit"); + } + + /** + * Test to see if the mode is one where this mode + * is entered for this token only and automatically + * leaves immediately afterwoods. + * @param string $mode Mode to test. + * @return boolean True if this is the exit mode. + * @access private + */ + function _isSpecialMode($mode) { + return (strncmp($mode, "_", 1) == 0); + } + + /** + * Strips the magic underscore marking single token + * modes. + * @param string $mode Mode to decode. + * @return string Underlying mode name. + * @access private + */ + function _decodeSpecial($mode) { + return substr($mode, 1); + } + + /** + * Calls the parser method named after the current + * mode. Empty content will be ignored. The lexer + * has a parser handler for each mode in the lexer. + * @param string $content Text parsed. + * @param boolean $is_match Token is recognised rather + * than unparsed data. + * @param int $pos Current byte index location in raw doc + * thats being parsed + * @access private + */ + function _invokeParser($content, $is_match, $pos) { + if (($content === "") || ($content === false)) { + return true; + } + $handler = $this->_mode->getCurrent(); + if (isset($this->_mode_handlers[$handler])) { + $handler = $this->_mode_handlers[$handler]; + } + + // modes starting with plugin_ are all handled by the same + // handler but with an additional parameter + if(substr($handler,0,7)=='plugin_'){ + list($handler,$plugin) = explode('_',$handler,2); + return $this->_parser->$handler($content, $is_match, $pos, $plugin); + } + + return $this->_parser->$handler($content, $is_match, $pos); + } + + /** + * Tries to match a chunk of text and if successful + * removes the recognised chunk and any leading + * unparsed data. Empty strings will not be matched. + * @param string $raw The subject to parse. This is the + * content that will be eaten. + * @return array Three item list of unparsed + * content followed by the + * recognised token and finally the + * action the parser is to take. + * True if no match, false if there + * is a parsing error. + * @access private + */ + function _reduce(&$raw) { + if (! isset($this->_regexes[$this->_mode->getCurrent()])) { + return false; + } + if ($raw === "") { + return true; + } + if ($action = $this->_regexes[$this->_mode->getCurrent()]->split($raw, $split)) { + list($unparsed, $match, $raw) = $split; + return array($unparsed, $match, $action); + } + return true; + } +} + +/** +* Escapes regex characters other than (, ) and / +* @TODO +*/ +function Doku_Lexer_Escape($str) { + //$str = addslashes($str); + $chars = array( + '/\\\\/', + '/\./', + '/\+/', + '/\*/', + '/\?/', + '/\[/', + '/\^/', + '/\]/', + '/\$/', + '/\{/', + '/\}/', + '/\=/', + '/\!/', + '/\</', + '/\>/', + '/\|/', + '/\:/' + ); + + $escaped = array( + '\\\\\\\\', + '\.', + '\+', + '\*', + '\?', + '\[', + '\^', + '\]', + '\$', + '\{', + '\}', + '\=', + '\!', + '\<', + '\>', + '\|', + '\:' + ); + return preg_replace($chars, $escaped, $str); +} + +//Setup VIM: ex: et ts=4 sw=4 enc=utf-8 : diff --git a/lib/dokuwiki/inc/parser/metadata.php b/lib/dokuwiki/inc/parser/metadata.php new file mode 100644 index 000000000..fc60e5774 --- /dev/null +++ b/lib/dokuwiki/inc/parser/metadata.php @@ -0,0 +1,483 @@ +<?php +/** + * Renderer for metadata + * + * @author Esther Brunner <wikidesign@gmail.com> + */ +if(!defined('DOKU_INC')) die('meh.'); + +if ( !defined('DOKU_LF') ) { + // Some whitespace to help View > Source + define ('DOKU_LF',"\n"); +} + +if ( !defined('DOKU_TAB') ) { + // Some whitespace to help View > Source + define ('DOKU_TAB',"\t"); +} + +require_once DOKU_INC . 'inc/parser/renderer.php'; + +/** + * The Renderer + */ +class Doku_Renderer_metadata extends Doku_Renderer { + + var $doc = ''; + var $meta = array(); + var $persistent = array(); + + var $headers = array(); + var $capture = true; + var $store = ''; + var $firstimage = ''; + + function getFormat(){ + return 'metadata'; + } + + function document_start(){ + global $ID; + + $this->headers = array(); + + // external pages are missing create date + if(!$this->persistent['date']['created']){ + $this->persistent['date']['created'] = filectime(wikiFN($ID)); + } + if(!isset($this->persistent['creator'])){ + $this->persistent['creator'] = ''; + } + // reset metadata to persistent values + $this->meta = $this->persistent; + } + + function document_end(){ + global $ID; + + // store internal info in metadata (notoc,nocache) + $this->meta['internal'] = $this->info; + + if (!isset($this->meta['description']['abstract'])){ + // cut off too long abstracts + $this->doc = trim($this->doc); + if (strlen($this->doc) > 500) + $this->doc = utf8_substr($this->doc, 0, 500).'…'; + $this->meta['description']['abstract'] = $this->doc; + } + + $this->meta['relation']['firstimage'] = $this->firstimage; + + if(!isset($this->meta['date']['modified'])){ + $this->meta['date']['modified'] = filemtime(wikiFN($ID)); + } + + } + + function toc_additem($id, $text, $level) { + global $conf; + + //only add items within configured levels + if($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']){ + // the TOC is one of our standard ul list arrays ;-) + $this->meta['description']['tableofcontents'][] = array( + 'hid' => $id, + 'title' => $text, + 'type' => 'ul', + 'level' => $level-$conf['toptoclevel']+1 + ); + } + + } + + function header($text, $level, $pos) { + if (!isset($this->meta['title'])) $this->meta['title'] = $text; + + // add the header to the TOC + $hid = $this->_headerToLink($text,'true'); + $this->toc_additem($hid, $text, $level); + + // add to summary + if ($this->capture && ($level > 1)) $this->doc .= DOKU_LF.$text.DOKU_LF; + } + + function section_open($level){} + function section_close(){} + + function cdata($text){ + if ($this->capture) $this->doc .= $text; + } + + function p_open(){ + if ($this->capture) $this->doc .= DOKU_LF; + } + + function p_close(){ + if ($this->capture){ + if (strlen($this->doc) > 250) $this->capture = false; + else $this->doc .= DOKU_LF; + } + } + + function linebreak(){ + if ($this->capture) $this->doc .= DOKU_LF; + } + + function hr(){ + if ($this->capture){ + if (strlen($this->doc) > 250) $this->capture = false; + else $this->doc .= DOKU_LF.'----------'.DOKU_LF; + } + } + + function strong_open(){} + function strong_close(){} + + function emphasis_open(){} + function emphasis_close(){} + + function underline_open(){} + function underline_close(){} + + function monospace_open(){} + function monospace_close(){} + + function subscript_open(){} + function subscript_close(){} + + function superscript_open(){} + function superscript_close(){} + + function deleted_open(){} + function deleted_close(){} + + /** + * Callback for footnote start syntax + * + * All following content will go to the footnote instead of + * the document. To achieve this the previous rendered content + * is moved to $store and $doc is cleared + * + * @author Andreas Gohr <andi@splitbrain.org> + */ + function footnote_open() { + if ($this->capture){ + // move current content to store and record footnote + $this->store = $this->doc; + $this->doc = ''; + } + } + + /** + * Callback for footnote end syntax + * + * All rendered content is moved to the $footnotes array and the old + * content is restored from $store again + * + * @author Andreas Gohr + */ + function footnote_close() { + if ($this->capture){ + // restore old content + $this->doc = $this->store; + $this->store = ''; + } + } + + function listu_open(){ + if ($this->capture) $this->doc .= DOKU_LF; + } + + function listu_close(){ + if ($this->capture && (strlen($this->doc) > 250)) $this->capture = false; + } + + function listo_open(){ + if ($this->capture) $this->doc .= DOKU_LF; + } + + function listo_close(){ + if ($this->capture && (strlen($this->doc) > 250)) $this->capture = false; + } + + function listitem_open($level){ + if ($this->capture) $this->doc .= str_repeat(DOKU_TAB, $level).'* '; + } + + function listitem_close(){ + if ($this->capture) $this->doc .= DOKU_LF; + } + + function listcontent_open(){} + function listcontent_close(){} + + function unformatted($text){ + if ($this->capture) $this->doc .= $text; + } + + function php($text){} + + function phpblock($text){} + + function html($text){} + + function htmlblock($text){} + + function preformatted($text){ + if ($this->capture) $this->doc .= $text; + } + + function file($text, $lang = null, $file = null){ + if ($this->capture){ + $this->doc .= DOKU_LF.$text; + if (strlen($this->doc) > 250) $this->capture = false; + else $this->doc .= DOKU_LF; + } + } + + function quote_open(){ + if ($this->capture) $this->doc .= DOKU_LF.DOKU_TAB.'"'; + } + + function quote_close(){ + if ($this->capture){ + $this->doc .= '"'; + if (strlen($this->doc) > 250) $this->capture = false; + else $this->doc .= DOKU_LF; + } + } + + function code($text, $language = NULL, $file = null){ + if ($this->capture){ + $this->doc .= DOKU_LF.$text; + if (strlen($this->doc) > 250) $this->capture = false; + else $this->doc .= DOKU_LF; + } + } + + function acronym($acronym){ + if ($this->capture) $this->doc .= $acronym; + } + + function smiley($smiley){ + if ($this->capture) $this->doc .= $smiley; + } + + function entity($entity){ + if ($this->capture) $this->doc .= $entity; + } + + function multiplyentity($x, $y){ + if ($this->capture) $this->doc .= $x.'×'.$y; + } + + function singlequoteopening(){ + global $lang; + if ($this->capture) $this->doc .= $lang['singlequoteopening']; + } + + function singlequoteclosing(){ + global $lang; + if ($this->capture) $this->doc .= $lang['singlequoteclosing']; + } + + function apostrophe() { + global $lang; + if ($this->capture) $this->doc .= $lang['apostrophe']; + } + + function doublequoteopening(){ + global $lang; + if ($this->capture) $this->doc .= $lang['doublequoteopening']; + } + + function doublequoteclosing(){ + global $lang; + if ($this->capture) $this->doc .= $lang['doublequoteclosing']; + } + + function camelcaselink($link) { + $this->internallink($link, $link); + } + + function locallink($hash, $name = NULL){} + + /** + * keep track of internal links in $this->meta['relation']['references'] + */ + function internallink($id, $name = NULL){ + global $ID; + + if(is_array($name)) + $this->_firstimage($name['src']); + + $default = $this->_simpleTitle($id); + + // first resolve and clean up the $id + resolve_pageid(getNS($ID), $id, $exists); + list($page, $hash) = explode('#', $id, 2); + + // set metadata + $this->meta['relation']['references'][$page] = $exists; + // $data = array('relation' => array('isreferencedby' => array($ID => true))); + // p_set_metadata($id, $data); + + // add link title to summary + if ($this->capture){ + $name = $this->_getLinkTitle($name, $default, $id); + $this->doc .= $name; + } + } + + function externallink($url, $name = NULL){ + if(is_array($name)) + $this->_firstimage($name['src']); + + if ($this->capture){ + if ($name) $this->doc .= $name; + else $this->doc .= '<'.$url.'>'; + } + } + + function interwikilink($match, $name = NULL, $wikiName, $wikiUri){ + if(is_array($name)) + $this->_firstimage($name['src']); + + if ($this->capture){ + list($wikiUri, $hash) = explode('#', $wikiUri, 2); + $name = $this->_getLinkTitle($name, $wikiName.'>'.$wikiUri); + $this->doc .= $name; + } + } + + function windowssharelink($url, $name = NULL){ + if(is_array($name)) + $this->_firstimage($name['src']); + + if ($this->capture){ + if ($name) $this->doc .= $name; + else $this->doc .= '<'.$url.'>'; + } + } + + function emaillink($address, $name = NULL){ + if(is_array($name)) + $this->_firstimage($name['src']); + + if ($this->capture){ + if ($name) $this->doc .= $name; + else $this->doc .= '<'.$address.'>'; + } + } + + function internalmedia($src, $title=NULL, $align=NULL, $width=NULL, + $height=NULL, $cache=NULL, $linking=NULL){ + if ($this->capture && $title) $this->doc .= '['.$title.']'; + $this->_firstimage($src); + } + + function externalmedia($src, $title=NULL, $align=NULL, $width=NULL, + $height=NULL, $cache=NULL, $linking=NULL){ + if ($this->capture && $title) $this->doc .= '['.$title.']'; + $this->_firstimage($src); + } + + function rss($url,$params) { + $this->meta['relation']['haspart'][$url] = true; + + $this->meta['date']['valid']['age'] = + isset($this->meta['date']['valid']['age']) ? + min($this->meta['date']['valid']['age'],$params['refresh']) : + $params['refresh']; + } + + function table_open($maxcols = NULL, $numrows = NULL){} + function table_close(){} + + function tablerow_open(){} + function tablerow_close(){} + + function tableheader_open($colspan = 1, $align = NULL, $rowspan = 1){} + function tableheader_close(){} + + function tablecell_open($colspan = 1, $align = NULL, $rowspan = 1){} + function tablecell_close(){} + + //---------------------------------------------------------- + // Utils + + /** + * Removes any Namespace from the given name but keeps + * casing and special chars + * + * @author Andreas Gohr <andi@splitbrain.org> + */ + function _simpleTitle($name){ + global $conf; + + if(is_array($name)) return ''; + + if($conf['useslash']){ + $nssep = '[:;/]'; + }else{ + $nssep = '[:;]'; + } + $name = preg_replace('!.*'.$nssep.'!','',$name); + //if there is a hash we use the anchor name only + $name = preg_replace('!.*#!','',$name); + return $name; + } + + /** + * Creates a linkid from a headline + * + * @param string $title The headline title + * @param boolean $create Create a new unique ID? + * @author Andreas Gohr <andi@splitbrain.org> + */ + function _headerToLink($title, $create=false) { + if($create){ + return sectionID($title,$this->headers); + }else{ + $check = false; + return sectionID($title,$check); + } + } + + /** + * Construct a title and handle images in titles + * + * @author Harry Fuecks <hfuecks@gmail.com> + */ + function _getLinkTitle($title, $default, $id=NULL) { + global $conf; + + $isImage = false; + if (is_null($title)){ + if (useHeading('content') && $id){ + $heading = p_get_first_heading($id,false); + if ($heading) return $heading; + } + return $default; + } else if (is_string($title)){ + return $title; + } else if (is_array($title)){ + return '['.$title['title'].']'; + } + } + + function _firstimage($src){ + if($this->firstimage) return; + global $ID; + + list($src,$hash) = explode('#',$src,2); + if(!preg_match('/^https?:\/\//i',$src)){ + resolve_mediaid(getNS($ID),$src, $exists); + } + if(preg_match('/.(jpe?g|gif|png)$/i',$src)){ + $this->firstimage = $src; + } + } +} + +//Setup VIM: ex: et ts=4 enc=utf-8 : diff --git a/lib/dokuwiki/inc/parser/parser.php b/lib/dokuwiki/inc/parser/parser.php new file mode 100644 index 000000000..a78b08a29 --- /dev/null +++ b/lib/dokuwiki/inc/parser/parser.php @@ -0,0 +1,956 @@ +<?php +if(!defined('DOKU_INC')) die('meh.'); +require_once DOKU_INC . 'inc/parser/lexer.php'; +require_once DOKU_INC . 'inc/parser/handler.php'; + + +/** + * Define various types of modes used by the parser - they are used to + * populate the list of modes another mode accepts + */ +global $PARSER_MODES; +$PARSER_MODES = array( + // containers are complex modes that can contain many other modes + // hr breaks the principle but they shouldn't be used in tables / lists + // so they are put here + 'container' => array('listblock','table','quote','hr'), + + // some mode are allowed inside the base mode only + 'baseonly' => array('header'), + + // modes for styling text -- footnote behaves similar to styling + 'formatting' => array('strong', 'emphasis', 'underline', 'monospace', + 'subscript', 'superscript', 'deleted', 'footnote'), + + // modes where the token is simply replaced - they can not contain any + // other modes + 'substition' => array('acronym','smiley','wordblock','entity', + 'camelcaselink', 'internallink','media', + 'externallink','linebreak','emaillink', + 'windowssharelink','filelink','notoc', + 'nocache','multiplyentity','quotes','rss'), + + // modes which have a start and end token but inside which + // no other modes should be applied + 'protected' => array('preformatted','code','file','php','html','htmlblock','phpblock'), + + // inside this mode no wiki markup should be applied but lineendings + // and whitespace isn't preserved + 'disabled' => array('unformatted'), + + // used to mark paragraph boundaries + 'paragraphs' => array('eol') +); + +//------------------------------------------------------------------- + +/** +* Sets up the Lexer with modes and points it to the Handler +* For an intro to the Lexer see: wiki:parser +*/ +class Doku_Parser { + + var $Handler; + + var $Lexer; + + var $modes = array(); + + var $connected = false; + + function addBaseMode(& $BaseMode) { + $this->modes['base'] = & $BaseMode; + if ( !$this->Lexer ) { + $this->Lexer = & new Doku_Lexer($this->Handler,'base', true); + } + $this->modes['base']->Lexer = & $this->Lexer; + } + + /** + * PHP preserves order of associative elements + * Mode sequence is important + */ + function addMode($name, & $Mode) { + if ( !isset($this->modes['base']) ) { + $this->addBaseMode(new Doku_Parser_Mode_base()); + } + $Mode->Lexer = & $this->Lexer; + $this->modes[$name] = & $Mode; + } + + function connectModes() { + + if ( $this->connected ) { + return; + } + + foreach ( array_keys($this->modes) as $mode ) { + + // Base isn't connected to anything + if ( $mode == 'base' ) { + continue; + } + + $this->modes[$mode]->preConnect(); + + foreach ( array_keys($this->modes) as $cm ) { + + if ( $this->modes[$cm]->accepts($mode) ) { + $this->modes[$mode]->connectTo($cm); + } + + } + + $this->modes[$mode]->postConnect(); + } + + $this->connected = true; + } + + function parse($doc) { + if ( $this->Lexer ) { + $this->connectModes(); + // Normalize CRs and pad doc + $doc = "\n".str_replace("\r\n","\n",$doc)."\n"; + $this->Lexer->parse($doc); + $this->Handler->_finalize(); + return $this->Handler->calls; + } else { + return false; + } + } + +} + +//------------------------------------------------------------------- +/** + * This class and all the subclasses below are + * used to reduce the effort required to register + * modes with the Lexer. For performance these + * could all be eliminated later perhaps, or + * the Parser could be serialized to a file once + * all modes are registered + * + * @author Harry Fuecks <hfuecks@gmail.com> +*/ +class Doku_Parser_Mode { + + var $Lexer; + + var $allowedModes = array(); + + // returns a number used to determine in which order modes are added + function getSort() { + trigger_error('getSort() not implemented in '.get_class($this), E_USER_WARNING); + } + + // Called before any calls to connectTo + function preConnect() {} + + // Connects the mode + function connectTo($mode) {} + + // Called after all calls to connectTo + function postConnect() {} + + function accepts($mode) { + return in_array($mode, (array) $this->allowedModes ); + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_base extends Doku_Parser_Mode { + + function Doku_Parser_Mode_base() { + global $PARSER_MODES; + + $this->allowedModes = array_merge ( + $PARSER_MODES['container'], + $PARSER_MODES['baseonly'], + $PARSER_MODES['paragraphs'], + $PARSER_MODES['formatting'], + $PARSER_MODES['substition'], + $PARSER_MODES['protected'], + $PARSER_MODES['disabled'] + ); + } + + function getSort() { + return 0; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_footnote extends Doku_Parser_Mode { + + function Doku_Parser_Mode_footnote() { + global $PARSER_MODES; + + $this->allowedModes = array_merge ( + $PARSER_MODES['container'], + $PARSER_MODES['formatting'], + $PARSER_MODES['substition'], + $PARSER_MODES['protected'], + $PARSER_MODES['disabled'] + ); + + unset($this->allowedModes[array_search('footnote', $this->allowedModes)]); + } + + function connectTo($mode) { + $this->Lexer->addEntryPattern( + '\x28\x28(?=.*\x29\x29)',$mode,'footnote' + ); + } + + function postConnect() { + $this->Lexer->addExitPattern( + '\x29\x29','footnote' + ); + } + + function getSort() { + return 150; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_header extends Doku_Parser_Mode { + + function preConnect() { + //we're not picky about the closing ones, two are enough + $this->Lexer->addSpecialPattern( + '[ \t]*={2,}[^\n]+={2,}[ \t]*(?=\n)', + 'base', + 'header' + ); + } + + function getSort() { + return 50; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_notoc extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern('~~NOTOC~~',$mode,'notoc'); + } + + function getSort() { + return 30; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_nocache extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern('~~NOCACHE~~',$mode,'nocache'); + } + + function getSort() { + return 40; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_linebreak extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern('\x5C{2}(?:[ \t]|(?=\n))',$mode,'linebreak'); + } + + function getSort() { + return 140; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_eol extends Doku_Parser_Mode { + + function connectTo($mode) { + $badModes = array('listblock','table'); + if ( in_array($mode, $badModes) ) { + return; + } + // see FS#1652, pattern extended to swallow preceding whitespace to avoid issues with lines that only contain whitespace + $this->Lexer->addSpecialPattern('(?:^[ \t]*)?\n',$mode,'eol'); + } + + function getSort() { + return 370; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_hr extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern('\n[ \t]*-{4,}[ \t]*(?=\n)',$mode,'hr'); + } + + function getSort() { + return 160; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_formatting extends Doku_Parser_Mode { + var $type; + + var $formatting = array ( + 'strong' => array ( + 'entry'=>'\*\*(?=.*\*\*)', + 'exit'=>'\*\*', + 'sort'=>70 + ), + + 'emphasis'=> array ( + 'entry'=>'//(?=[^\x00]*[^:])', //hack for bugs #384 #763 #1468 + 'exit'=>'//', + 'sort'=>80 + ), + + 'underline'=> array ( + 'entry'=>'__(?=.*__)', + 'exit'=>'__', + 'sort'=>90 + ), + + 'monospace'=> array ( + 'entry'=>'\x27\x27(?=.*\x27\x27)', + 'exit'=>'\x27\x27', + 'sort'=>100 + ), + + 'subscript'=> array ( + 'entry'=>'<sub>(?=.*</sub>)', + 'exit'=>'</sub>', + 'sort'=>110 + ), + + 'superscript'=> array ( + 'entry'=>'<sup>(?=.*</sup>)', + 'exit'=>'</sup>', + 'sort'=>120 + ), + + 'deleted'=> array ( + 'entry'=>'<del>(?=.*</del>)', + 'exit'=>'</del>', + 'sort'=>130 + ), + ); + + function Doku_Parser_Mode_formatting($type) { + global $PARSER_MODES; + + if ( !array_key_exists($type, $this->formatting) ) { + trigger_error('Invalid formatting type '.$type, E_USER_WARNING); + } + + $this->type = $type; + + // formatting may contain other formatting but not it self + $modes = $PARSER_MODES['formatting']; + $key = array_search($type, $modes); + if ( is_int($key) ) { + unset($modes[$key]); + } + + $this->allowedModes = array_merge ( + $modes, + $PARSER_MODES['substition'], + $PARSER_MODES['disabled'] + ); + } + + function connectTo($mode) { + + // Can't nest formatting in itself + if ( $mode == $this->type ) { + return; + } + + $this->Lexer->addEntryPattern( + $this->formatting[$this->type]['entry'], + $mode, + $this->type + ); + } + + function postConnect() { + + $this->Lexer->addExitPattern( + $this->formatting[$this->type]['exit'], + $this->type + ); + + } + + function getSort() { + return $this->formatting[$this->type]['sort']; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_listblock extends Doku_Parser_Mode { + + function Doku_Parser_Mode_listblock() { + global $PARSER_MODES; + + $this->allowedModes = array_merge ( + $PARSER_MODES['formatting'], + $PARSER_MODES['substition'], + $PARSER_MODES['disabled'], + $PARSER_MODES['protected'] #XXX new + ); + + // $this->allowedModes[] = 'footnote'; + } + + function connectTo($mode) { + $this->Lexer->addEntryPattern('\n {2,}[\-\*]',$mode,'listblock'); + $this->Lexer->addEntryPattern('\n\t{1,}[\-\*]',$mode,'listblock'); + + $this->Lexer->addPattern('\n {2,}[\-\*]','listblock'); + $this->Lexer->addPattern('\n\t{1,}[\-\*]','listblock'); + + } + + function postConnect() { + $this->Lexer->addExitPattern('\n','listblock'); + } + + function getSort() { + return 10; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_table extends Doku_Parser_Mode { + + function Doku_Parser_Mode_table() { + global $PARSER_MODES; + + $this->allowedModes = array_merge ( + $PARSER_MODES['formatting'], + $PARSER_MODES['substition'], + $PARSER_MODES['disabled'], + $PARSER_MODES['protected'] + ); + } + + function connectTo($mode) { + $this->Lexer->addEntryPattern('\n\^',$mode,'table'); + $this->Lexer->addEntryPattern('\n\|',$mode,'table'); + } + + function postConnect() { + $this->Lexer->addPattern('\n\^','table'); + $this->Lexer->addPattern('\n\|','table'); + $this->Lexer->addPattern('[\t ]*:::[\t ]*(?=[\|\^])','table'); + $this->Lexer->addPattern('[\t ]+','table'); + $this->Lexer->addPattern('\^','table'); + $this->Lexer->addPattern('\|','table'); + $this->Lexer->addExitPattern('\n','table'); + } + + function getSort() { + return 60; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_unformatted extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addEntryPattern('<nowiki>(?=.*</nowiki>)',$mode,'unformatted'); + $this->Lexer->addEntryPattern('%%(?=.*%%)',$mode,'unformattedalt'); + } + + function postConnect() { + $this->Lexer->addExitPattern('</nowiki>','unformatted'); + $this->Lexer->addExitPattern('%%','unformattedalt'); + $this->Lexer->mapHandler('unformattedalt','unformatted'); + } + + function getSort() { + return 170; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_php extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addEntryPattern('<php>(?=.*</php>)',$mode,'php'); + $this->Lexer->addEntryPattern('<PHP>(?=.*</PHP>)',$mode,'phpblock'); + } + + function postConnect() { + $this->Lexer->addExitPattern('</php>','php'); + $this->Lexer->addExitPattern('</PHP>','phpblock'); + } + + function getSort() { + return 180; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_html extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addEntryPattern('<html>(?=.*</html>)',$mode,'html'); + $this->Lexer->addEntryPattern('<HTML>(?=.*</HTML>)',$mode,'htmlblock'); + } + + function postConnect() { + $this->Lexer->addExitPattern('</html>','html'); + $this->Lexer->addExitPattern('</HTML>','htmlblock'); + } + + function getSort() { + return 190; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_preformatted extends Doku_Parser_Mode { + + function connectTo($mode) { + // Has hard coded awareness of lists... + $this->Lexer->addEntryPattern('\n (?![\*\-])',$mode,'preformatted'); + $this->Lexer->addEntryPattern('\n\t(?![\*\-])',$mode,'preformatted'); + + // How to effect a sub pattern with the Lexer! + $this->Lexer->addPattern('\n ','preformatted'); + $this->Lexer->addPattern('\n\t','preformatted'); + + } + + function postConnect() { + $this->Lexer->addExitPattern('\n','preformatted'); + } + + function getSort() { + return 20; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_code extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addEntryPattern('<code(?=.*</code>)',$mode,'code'); + } + + function postConnect() { + $this->Lexer->addExitPattern('</code>','code'); + } + + function getSort() { + return 200; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_file extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addEntryPattern('<file(?=.*</file>)',$mode,'file'); + } + + function postConnect() { + $this->Lexer->addExitPattern('</file>','file'); + } + + function getSort() { + return 210; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_quote extends Doku_Parser_Mode { + + function Doku_Parser_Mode_quote() { + global $PARSER_MODES; + + $this->allowedModes = array_merge ( + $PARSER_MODES['formatting'], + $PARSER_MODES['substition'], + $PARSER_MODES['disabled'], + $PARSER_MODES['protected'] #XXX new + ); + #$this->allowedModes[] = 'footnote'; + #$this->allowedModes[] = 'preformatted'; + #$this->allowedModes[] = 'unformatted'; + } + + function connectTo($mode) { + $this->Lexer->addEntryPattern('\n>{1,}',$mode,'quote'); + } + + function postConnect() { + $this->Lexer->addPattern('\n>{1,}','quote'); + $this->Lexer->addExitPattern('\n','quote'); + } + + function getSort() { + return 220; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_acronym extends Doku_Parser_Mode { + // A list + var $acronyms = array(); + var $pattern = ''; + + function Doku_Parser_Mode_acronym($acronyms) { + usort($acronyms,array($this,'_compare')); + $this->acronyms = $acronyms; + } + + function preConnect() { + if(!count($this->acronyms)) return; + + $bound = '[\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]'; + $acronyms = array_map('Doku_Lexer_Escape',$this->acronyms); + $this->pattern = '(?<=^|'.$bound.')(?:'.join('|',$acronyms).')(?='.$bound.')'; + } + + function connectTo($mode) { + if(!count($this->acronyms)) return; + + if ( strlen($this->pattern) > 0 ) { + $this->Lexer->addSpecialPattern($this->pattern,$mode,'acronym'); + } + } + + function getSort() { + return 240; + } + + /** + * sort callback to order by string length descending + */ + function _compare($a,$b) { + $a_len = strlen($a); + $b_len = strlen($b); + if ($a_len > $b_len) { + return -1; + } else if ($a_len < $b_len) { + return 1; + } + + return 0; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_smiley extends Doku_Parser_Mode { + // A list + var $smileys = array(); + var $pattern = ''; + + function Doku_Parser_Mode_smiley($smileys) { + $this->smileys = $smileys; + } + + function preConnect() { + if(!count($this->smileys) || $this->pattern != '') return; + + $sep = ''; + foreach ( $this->smileys as $smiley ) { + $this->pattern .= $sep.'(?<=\W|^)'.Doku_Lexer_Escape($smiley).'(?=\W|$)'; + $sep = '|'; + } + } + + function connectTo($mode) { + if(!count($this->smileys)) return; + + if ( strlen($this->pattern) > 0 ) { + $this->Lexer->addSpecialPattern($this->pattern,$mode,'smiley'); + } + } + + function getSort() { + return 230; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_wordblock extends Doku_Parser_Mode { + // A list + var $badwords = array(); + var $pattern = ''; + + function Doku_Parser_Mode_wordblock($badwords) { + $this->badwords = $badwords; + } + + function preConnect() { + + if ( count($this->badwords) == 0 || $this->pattern != '') { + return; + } + + $sep = ''; + foreach ( $this->badwords as $badword ) { + $this->pattern .= $sep.'(?<=\b)(?i)'.Doku_Lexer_Escape($badword).'(?-i)(?=\b)'; + $sep = '|'; + } + + } + + function connectTo($mode) { + if ( strlen($this->pattern) > 0 ) { + $this->Lexer->addSpecialPattern($this->pattern,$mode,'wordblock'); + } + } + + function getSort() { + return 250; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_entity extends Doku_Parser_Mode { + // A list + var $entities = array(); + var $pattern = ''; + + function Doku_Parser_Mode_entity($entities) { + $this->entities = $entities; + } + + function preConnect() { + if(!count($this->entities) || $this->pattern != '') return; + + $sep = ''; + foreach ( $this->entities as $entity ) { + $this->pattern .= $sep.Doku_Lexer_Escape($entity); + $sep = '|'; + } + } + + function connectTo($mode) { + if(!count($this->entities)) return; + + if ( strlen($this->pattern) > 0 ) { + $this->Lexer->addSpecialPattern($this->pattern,$mode,'entity'); + } + } + + function getSort() { + return 260; + } +} + +//------------------------------------------------------------------- +// Implements the 640x480 replacement +class Doku_Parser_Mode_multiplyentity extends Doku_Parser_Mode { + + function connectTo($mode) { + + $this->Lexer->addSpecialPattern( + '(?<=\b)(?:[1-9]|\d{2,})[xX]\d+(?=\b)',$mode,'multiplyentity' + ); + + } + + function getSort() { + return 270; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_quotes extends Doku_Parser_Mode { + + function connectTo($mode) { + global $conf; + + $ws = '\s/\#~:+=&%@\-\x28\x29\]\[{}><"\''; // whitespace + $punc = ';,\.?!'; + + if($conf['typography'] == 2){ + $this->Lexer->addSpecialPattern( + "(?<=^|[$ws])'(?=[^$ws$punc])",$mode,'singlequoteopening' + ); + $this->Lexer->addSpecialPattern( + "(?<=^|[^$ws]|[$punc])'(?=$|[$ws$punc])",$mode,'singlequoteclosing' + ); + $this->Lexer->addSpecialPattern( + "(?<=^|[^$ws$punc])'(?=$|[^$ws$punc])",$mode,'apostrophe' + ); + } + + $this->Lexer->addSpecialPattern( + "(?<=^|[$ws])\"(?=[^$ws$punc])",$mode,'doublequoteopening' + ); + $this->Lexer->addSpecialPattern( + "\"",$mode,'doublequoteclosing' + ); + + + } + + function getSort() { + return 280; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_camelcaselink extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern( + '\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b',$mode,'camelcaselink' + ); + } + + function getSort() { + return 290; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_internallink extends Doku_Parser_Mode { + + function connectTo($mode) { + // Word boundaries? + $this->Lexer->addSpecialPattern("\[\[.+?\]\]",$mode,'internallink'); + } + + function getSort() { + return 300; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_media extends Doku_Parser_Mode { + + function connectTo($mode) { + // Word boundaries? + $this->Lexer->addSpecialPattern("\{\{[^\}]+\}\}",$mode,'media'); + } + + function getSort() { + return 320; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_rss extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern("\{\{rss>[^\}]+\}\}",$mode,'rss'); + } + + function getSort() { + return 310; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_externallink extends Doku_Parser_Mode { + var $schemes = array(); + var $patterns = array(); + + function preConnect() { + if(count($this->patterns)) return; + + $ltrs = '\w'; + $gunk = '/\#~:.?+=&%@!\-'; + $punc = '.:?\-;,'; + $host = $ltrs.$punc; + $any = $ltrs.$gunk.$punc; + + $this->schemes = getSchemes(); + foreach ( $this->schemes as $scheme ) { + $this->patterns[] = '\b(?i)'.$scheme.'(?-i)://['.$any.']+?(?=['.$punc.']*[^'.$any.'])'; + } + + $this->patterns[] = '\b(?i)www?(?-i)\.['.$host.']+?\.['.$host.']+?['.$any.']+?(?=['.$punc.']*[^'.$any.'])'; + $this->patterns[] = '\b(?i)ftp?(?-i)\.['.$host.']+?\.['.$host.']+?['.$any.']+?(?=['.$punc.']*[^'.$any.'])'; + } + + function connectTo($mode) { + + foreach ( $this->patterns as $pattern ) { + $this->Lexer->addSpecialPattern($pattern,$mode,'externallink'); + } + } + + function getSort() { + return 330; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_filelink extends Doku_Parser_Mode { + + var $pattern; + + function preConnect() { + + $ltrs = '\w'; + $gunk = '/\#~:.?+=&%@!\-'; + $punc = '.:?\-;,'; + $host = $ltrs.$punc; + $any = $ltrs.$gunk.$punc; + + $this->pattern = '\b(?i)file(?-i)://['.$any.']+?['. + $punc.']*[^'.$any.']'; + } + + function connectTo($mode) { + $this->Lexer->addSpecialPattern( + $this->pattern,$mode,'filelink'); + } + + function getSort() { + return 360; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_windowssharelink extends Doku_Parser_Mode { + + var $pattern; + + function preConnect() { + $this->pattern = "\\\\\\\\\w+?(?:\\\\[\w$]+)+"; + } + + function connectTo($mode) { + $this->Lexer->addSpecialPattern( + $this->pattern,$mode,'windowssharelink'); + } + + function getSort() { + return 350; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_emaillink extends Doku_Parser_Mode { + + function connectTo($mode) { + // pattern below is defined in inc/mail.php + $this->Lexer->addSpecialPattern('<'.PREG_PATTERN_VALID_EMAIL.'>',$mode,'emaillink'); + } + + function getSort() { + return 340; + } +} + + +//Setup VIM: ex: et ts=4 enc=utf-8 : diff --git a/lib/dokuwiki/inc/parser/renderer.php b/lib/dokuwiki/inc/parser/renderer.php new file mode 100644 index 000000000..6082e935d --- /dev/null +++ b/lib/dokuwiki/inc/parser/renderer.php @@ -0,0 +1,322 @@ +<?php +/** + * Renderer output base class + * + * @author Harry Fuecks <hfuecks@gmail.com> + * @author Andreas Gohr <andi@splitbrain.org> + */ +if(!defined('DOKU_INC')) die('meh.'); +require_once DOKU_INC . 'inc/parser/renderer.php'; +require_once DOKU_INC . 'inc/plugin.php'; +require_once DOKU_INC . 'inc/pluginutils.php'; + +/** + * An empty renderer, produces no output + * + * Inherits from DokuWiki_Plugin for giving additional functions to render plugins + */ +class Doku_Renderer extends DokuWiki_Plugin { + var $info = array( + 'cache' => true, // may the rendered result cached? + 'toc' => true, // render the TOC? + ); + + // keep some config options + var $acronyms = array(); + var $smileys = array(); + var $badwords = array(); + var $entities = array(); + var $interwiki = array(); + + // allows renderer to be used again, clean out any per-use values + function reset() { + } + + function nocache() { + $this->info['cache'] = false; + } + + function notoc() { + $this->info['toc'] = false; + } + + /** + * Returns the format produced by this renderer. + * + * Has to be overidden by decendend classes + */ + function getFormat(){ + trigger_error('getFormat() not implemented in '.get_class($this), E_USER_WARNING); + } + + + //handle plugin rendering + function plugin($name,$data){ + $plugin =& plugin_load('syntax',$name); + if($plugin != null){ + $plugin->render($this->getFormat(),$this,$data); + } + } + + /** + * handle nested render instructions + * this method (and nest_close method) should not be overloaded in actual renderer output classes + */ + function nest($instructions) { + + foreach ( $instructions as $instruction ) { + // execute the callback against ourself + call_user_func_array(array(&$this, $instruction[0]),$instruction[1]); + } + } + + // dummy closing instruction issued by Doku_Handler_Nest, normally the syntax mode should + // override this instruction when instantiating Doku_Handler_Nest - however plugins will not + // be able to - as their instructions require data. + function nest_close() {} + + function document_start() {} + + function document_end() {} + + function render_TOC() { return ''; } + + function toc_additem($id, $text, $level) {} + + function header($text, $level, $pos) {} + + function section_edit($start, $end, $level, $name) {} + + function section_open($level) {} + + function section_close() {} + + function cdata($text) {} + + function p_open() {} + + function p_close() {} + + function linebreak() {} + + function hr() {} + + function strong_open() {} + + function strong_close() {} + + function emphasis_open() {} + + function emphasis_close() {} + + function underline_open() {} + + function underline_close() {} + + function monospace_open() {} + + function monospace_close() {} + + function subscript_open() {} + + function subscript_close() {} + + function superscript_open() {} + + function superscript_close() {} + + function deleted_open() {} + + function deleted_close() {} + + function footnote_open() {} + + function footnote_close() {} + + function listu_open() {} + + function listu_close() {} + + function listo_open() {} + + function listo_close() {} + + function listitem_open($level) {} + + function listitem_close() {} + + function listcontent_open() {} + + function listcontent_close() {} + + function unformatted($text) {} + + function php($text) {} + + function phpblock($text) {} + + function html($text) {} + + function htmlblock($text) {} + + function preformatted($text) {} + + function quote_open() {} + + function quote_close() {} + + function file($text, $lang = null, $file = null ) {} + + function code($text, $lang = null, $file = null ) {} + + function acronym($acronym) {} + + function smiley($smiley) {} + + function wordblock($word) {} + + function entity($entity) {} + + // 640x480 ($x=640, $y=480) + function multiplyentity($x, $y) {} + + function singlequoteopening() {} + + function singlequoteclosing() {} + + function apostrophe() {} + + function doublequoteopening() {} + + function doublequoteclosing() {} + + // $link like 'SomePage' + function camelcaselink($link) {} + + function locallink($hash, $name = NULL) {} + + // $link like 'wiki:syntax', $title could be an array (media) + function internallink($link, $title = NULL) {} + + // $link is full URL with scheme, $title could be an array (media) + function externallink($link, $title = NULL) {} + + function rss ($url,$params) {} + + // $link is the original link - probably not much use + // $wikiName is an indentifier for the wiki + // $wikiUri is the URL fragment to append to some known URL + function interwikilink($link, $title = NULL, $wikiName, $wikiUri) {} + + // Link to file on users OS, $title could be an array (media) + function filelink($link, $title = NULL) {} + + // Link to a Windows share, , $title could be an array (media) + function windowssharelink($link, $title = NULL) {} + +// function email($address, $title = NULL) {} + function emaillink($address, $name = NULL) {} + + function internalmedia ($src, $title=NULL, $align=NULL, $width=NULL, + $height=NULL, $cache=NULL, $linking=NULL) {} + + function externalmedia ($src, $title=NULL, $align=NULL, $width=NULL, + $height=NULL, $cache=NULL, $linking=NULL) {} + + function internalmedialink ( + $src,$title=NULL,$align=NULL,$width=NULL,$height=NULL,$cache=NULL + ) {} + + function externalmedialink( + $src,$title=NULL,$align=NULL,$width=NULL,$height=NULL,$cache=NULL + ) {} + + function table_open($maxcols = NULL, $numrows = NULL){} + + function table_close(){} + + function tablerow_open(){} + + function tablerow_close(){} + + function tableheader_open($colspan = 1, $align = NULL, $rowspan = 1){} + + function tableheader_close(){} + + function tablecell_open($colspan = 1, $align = NULL, $rowspan = 1){} + + function tablecell_close(){} + + + // util functions follow, you probably won't need to reimplement them + + + /** + * Removes any Namespace from the given name but keeps + * casing and special chars + * + * @author Andreas Gohr <andi@splitbrain.org> + */ + function _simpleTitle($name){ + global $conf; + + //if there is a hash we use the ancor name only + list($name,$hash) = explode('#',$name,2); + if($hash) return $hash; + + //trim colons or slash of a namespace link + $name = rtrim($name,':'); + if($conf['useslash']) + $name = rtrim($name,'/'); + + if($conf['useslash']){ + $nssep = '[:;/]'; + }else{ + $nssep = '[:;]'; + } + $name = preg_replace('!.*'.$nssep.'!','',$name); + + if(!$name) return $this->_simpleTitle($conf['start']); + return $name; + } + + /** + * Resolve an interwikilink + */ + function _resolveInterWiki(&$shortcut,$reference){ + //get interwiki URL + if ( isset($this->interwiki[$shortcut]) ) { + $url = $this->interwiki[$shortcut]; + } else { + // Default to Google I'm feeling lucky + $url = 'http://www.google.com/search?q={URL}&btnI=lucky'; + $shortcut = 'go'; + } + + //split into hash and url part + list($reference,$hash) = explode('#',$reference,2); + + //replace placeholder + if(preg_match('#\{(URL|NAME|SCHEME|HOST|PORT|PATH|QUERY)\}#',$url)){ + //use placeholders + $url = str_replace('{URL}',rawurlencode($reference),$url); + $url = str_replace('{NAME}',$reference,$url); + $parsed = parse_url($reference); + if(!$parsed['port']) $parsed['port'] = 80; + $url = str_replace('{SCHEME}',$parsed['scheme'],$url); + $url = str_replace('{HOST}',$parsed['host'],$url); + $url = str_replace('{PORT}',$parsed['port'],$url); + $url = str_replace('{PATH}',$parsed['path'],$url); + $url = str_replace('{QUERY}',$parsed['query'],$url); + }else{ + //default + $url = $url.rawurlencode($reference); + } + if($hash) $url .= '#'.rawurlencode($hash); + + return $url; + } +} + + +//Setup VIM: ex: et ts=4 enc=utf-8 : diff --git a/lib/dokuwiki/inc/parser/xhtml.php b/lib/dokuwiki/inc/parser/xhtml.php new file mode 100644 index 000000000..4d5333f7a --- /dev/null +++ b/lib/dokuwiki/inc/parser/xhtml.php @@ -0,0 +1,1142 @@ +<?php +/** + * Renderer for XHTML output + * + * @author Harry Fuecks <hfuecks@gmail.com> + * @author Andreas Gohr <andi@splitbrain.org> + */ +if(!defined('DOKU_INC')) die('meh.'); + +if ( !defined('DOKU_LF') ) { + // Some whitespace to help View > Source + define ('DOKU_LF',"\n"); +} + +if ( !defined('DOKU_TAB') ) { + // Some whitespace to help View > Source + define ('DOKU_TAB',"\t"); +} + +require_once DOKU_INC . 'inc/parser/renderer.php'; +require_once DOKU_INC . 'inc/html.php'; + +/** + * The Renderer + */ +class Doku_Renderer_xhtml extends Doku_Renderer { + + // @access public + var $doc = ''; // will contain the whole document + var $toc = array(); // will contain the Table of Contents + + + var $headers = array(); + var $footnotes = array(); + var $lastlevel = 0; + var $node = array(0,0,0,0,0); + var $store = ''; + + var $_counter = array(); // used as global counter, introduced for table classes + var $_codeblock = 0; // counts the code and file blocks, used to provide download links + + function getFormat(){ + return 'xhtml'; + } + + + function document_start() { + //reset some internals + $this->toc = array(); + $this->headers = array(); + } + + function document_end() { + if ( count ($this->footnotes) > 0 ) { + $this->doc .= '<div class="footnotes">'.DOKU_LF; + + $id = 0; + foreach ( $this->footnotes as $footnote ) { + $id++; // the number of the current footnote + + // check its not a placeholder that indicates actual footnote text is elsewhere + if (substr($footnote, 0, 5) != "@@FNT") { + + // open the footnote and set the anchor and backlink + $this->doc .= '<div class="fn">'; + $this->doc .= '<sup><a href="#fnt__'.$id.'" id="fn__'.$id.'" name="fn__'.$id.'" class="fn_bot">'; + $this->doc .= $id.')</a></sup> '.DOKU_LF; + + // get any other footnotes that use the same markup + $alt = array_keys($this->footnotes, "@@FNT$id"); + + if (count($alt)) { + foreach ($alt as $ref) { + // set anchor and backlink for the other footnotes + $this->doc .= ', <sup><a href="#fnt__'.($ref+1).'" id="fn__'.($ref+1).'" name="fn__'.($ref+1).'" class="fn_bot">'; + $this->doc .= ($ref+1).')</a></sup> '.DOKU_LF; + } + } + + // add footnote markup and close this footnote + $this->doc .= $footnote; + $this->doc .= '</div>' . DOKU_LF; + } + } + $this->doc .= '</div>'.DOKU_LF; + } + + // Prepare the TOC + global $conf; + if($this->info['toc'] && is_array($this->toc) && $conf['tocminheads'] && count($this->toc) >= $conf['tocminheads']){ + global $TOC; + $TOC = $this->toc; + } + + // make sure there are no empty paragraphs + $this->doc = preg_replace('#<p>\s*</p>#','',$this->doc); + } + + function toc_additem($id, $text, $level) { + global $conf; + + //handle TOC + if($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']){ + $this->toc[] = html_mktocitem($id, $text, $level-$conf['toptoclevel']+1); + } + } + + function header($text, $level, $pos) { + if(!$text) return; //skip empty headlines + + $hid = $this->_headerToLink($text,true); + + //only add items within configured levels + $this->toc_additem($hid, $text, $level); + + // adjust $node to reflect hierarchy of levels + $this->node[$level-1]++; + if ($level < $this->lastlevel) { + for ($i = 0; $i < $this->lastlevel-$level; $i++) { + $this->node[$this->lastlevel-$i-1] = 0; + } + } + $this->lastlevel = $level; + + // write the header + $this->doc .= DOKU_LF.'<h'.$level.'><a name="'.$hid.'" id="'.$hid.'">'; + $this->doc .= $this->_xmlEntities($text); + $this->doc .= "</a></h$level>".DOKU_LF; + } + + /** + * Section edit marker is replaced by an edit button when + * the page is editable. Replacement done in 'inc/html.php#html_secedit' + * + * @author Andreas Gohr <andi@splitbrain.org> + * @author Ben Coburn <btcoburn@silicodon.net> + */ + function section_edit($start, $end, $level, $name) { + global $conf; + + if ($start!=-1 && $level<=$conf['maxseclevel']) { + $name = str_replace('"', '', $name); + $this->doc .= '<!-- SECTION "'.$name.'" ['.$start.'-'.(($end===0)?'':$end).'] -->'; + } + } + + function section_open($level) { + $this->doc .= "<div class=\"level$level\">".DOKU_LF; + } + + function section_close() { + $this->doc .= DOKU_LF.'</div>'.DOKU_LF; + } + + function cdata($text) { + $this->doc .= $this->_xmlEntities($text); + } + + function p_open() { + $this->doc .= DOKU_LF.'<p>'.DOKU_LF; + } + + function p_close() { + $this->doc .= DOKU_LF.'</p>'.DOKU_LF; + } + + function linebreak() { + $this->doc .= '<br/>'.DOKU_LF; + } + + function hr() { + $this->doc .= '<hr />'.DOKU_LF; + } + + function strong_open() { + $this->doc .= '<strong>'; + } + + function strong_close() { + $this->doc .= '</strong>'; + } + + function emphasis_open() { + $this->doc .= '<em>'; + } + + function emphasis_close() { + $this->doc .= '</em>'; + } + + function underline_open() { + $this->doc .= '<em class="u">'; + } + + function underline_close() { + $this->doc .= '</em>'; + } + + function monospace_open() { + $this->doc .= '<code>'; + } + + function monospace_close() { + $this->doc .= '</code>'; + } + + function subscript_open() { + $this->doc .= '<sub>'; + } + + function subscript_close() { + $this->doc .= '</sub>'; + } + + function superscript_open() { + $this->doc .= '<sup>'; + } + + function superscript_close() { + $this->doc .= '</sup>'; + } + + function deleted_open() { + $this->doc .= '<del>'; + } + + function deleted_close() { + $this->doc .= '</del>'; + } + + /** + * Callback for footnote start syntax + * + * All following content will go to the footnote instead of + * the document. To achieve this the previous rendered content + * is moved to $store and $doc is cleared + * + * @author Andreas Gohr <andi@splitbrain.org> + */ + function footnote_open() { + + // move current content to store and record footnote + $this->store = $this->doc; + $this->doc = ''; + } + + /** + * Callback for footnote end syntax + * + * All rendered content is moved to the $footnotes array and the old + * content is restored from $store again + * + * @author Andreas Gohr + */ + function footnote_close() { + + // recover footnote into the stack and restore old content + $footnote = $this->doc; + $this->doc = $this->store; + $this->store = ''; + + // check to see if this footnote has been seen before + $i = array_search($footnote, $this->footnotes); + + if ($i === false) { + // its a new footnote, add it to the $footnotes array + $id = count($this->footnotes)+1; + $this->footnotes[count($this->footnotes)] = $footnote; + } else { + // seen this one before, translate the index to an id and save a placeholder + $i++; + $id = count($this->footnotes)+1; + $this->footnotes[count($this->footnotes)] = "@@FNT".($i); + } + + // output the footnote reference and link + $this->doc .= '<sup><a href="#fn__'.$id.'" name="fnt__'.$id.'" id="fnt__'.$id.'" class="fn_top">'.$id.')</a></sup>'; + } + + function listu_open() { + $this->doc .= '<ul>'.DOKU_LF; + } + + function listu_close() { + $this->doc .= '</ul>'.DOKU_LF; + } + + function listo_open() { + $this->doc .= '<ol>'.DOKU_LF; + } + + function listo_close() { + $this->doc .= '</ol>'.DOKU_LF; + } + + function listitem_open($level) { + $this->doc .= '<li class="level'.$level.'">'; + } + + function listitem_close() { + $this->doc .= '</li>'.DOKU_LF; + } + + function listcontent_open() { + $this->doc .= '<div class="li">'; + } + + function listcontent_close() { + $this->doc .= '</div>'.DOKU_LF; + } + + function unformatted($text) { + $this->doc .= $this->_xmlEntities($text); + } + + /** + * Execute PHP code if allowed + * + * @param string $wrapper html element to wrap result if $conf['phpok'] is okff + * + * @author Andreas Gohr <andi@splitbrain.org> + */ + function php($text, $wrapper='code') { + global $conf; + + if($conf['phpok']){ + ob_start(); + eval($text); + $this->doc .= ob_get_contents(); + ob_end_clean(); + } else { + $this->doc .= p_xhtml_cached_geshi($text, 'php', $wrapper); + } + } + + function phpblock($text) { + $this->php($text, 'pre'); + } + + /** + * Insert HTML if allowed + * + * @param string $wrapper html element to wrap result if $conf['htmlok'] is okff + * + * @author Andreas Gohr <andi@splitbrain.org> + */ + function html($text, $wrapper='code') { + global $conf; + + if($conf['htmlok']){ + $this->doc .= $text; + } else { + $this->doc .= p_xhtml_cached_geshi($text, 'html4strict', $wrapper); + } + } + + function htmlblock($text) { + $this->html($text, 'pre'); + } + + function quote_open() { + $this->doc .= '<blockquote><div class="no">'.DOKU_LF; + } + + function quote_close() { + $this->doc .= '</div></blockquote>'.DOKU_LF; + } + + function preformatted($text) { + $this->doc .= '<pre class="code">' . trim($this->_xmlEntities($text),"\n\r") . '</pre>'. DOKU_LF; + } + + function file($text, $language=null, $filename=null) { + $this->_highlight('file',$text,$language,$filename); + } + + function code($text, $language=null, $filename=null) { + $this->_highlight('code',$text,$language,$filename); + } + + /** + * Use GeSHi to highlight language syntax in code and file blocks + * + * @author Andreas Gohr <andi@splitbrain.org> + */ + function _highlight($type, $text, $language=null, $filename=null) { + global $conf; + global $ID; + global $lang; + + if($filename){ + // add icon + list($ext) = mimetype($filename,false); + $class = preg_replace('/[^_\-a-z0-9]+/i','_',$ext); + $class = 'mediafile mf_'.$class; + + $this->doc .= '<dl class="'.$type.'">'.DOKU_LF; + $this->doc .= '<dt><a href="'.exportlink($ID,'code',array('codeblock'=>$this->_codeblock)).'" title="'.$lang['download'].'" class="'.$class.'">'; + $this->doc .= hsc($filename); + $this->doc .= '</a></dt>'.DOKU_LF.'<dd>'; + } + + if ( is_null($language) ) { + $this->doc .= '<pre class="'.$type.'">'.$this->_xmlEntities($text).'</pre>'.DOKU_LF; + } else { + $class = 'code'; //we always need the code class to make the syntax highlighting apply + if($type != 'code') $class .= ' '.$type; + + $this->doc .= "<pre class=\"$class $language\">".p_xhtml_cached_geshi($text, $language, '').'</pre>'.DOKU_LF; + } + + if($filename){ + $this->doc .= '</dd></dl>'.DOKU_LF; + } + + $this->_codeblock++; + } + + function acronym($acronym) { + + if ( array_key_exists($acronym, $this->acronyms) ) { + + $title = $this->_xmlEntities($this->acronyms[$acronym]); + + $this->doc .= '<acronym title="'.$title + .'">'.$this->_xmlEntities($acronym).'</acronym>'; + + } else { + $this->doc .= $this->_xmlEntities($acronym); + } + } + + function smiley($smiley) { + if ( array_key_exists($smiley, $this->smileys) ) { + $title = $this->_xmlEntities($this->smileys[$smiley]); + $this->doc .= '<img src="'.DOKU_BASE.'lib/images/smileys/'.$this->smileys[$smiley]. + '" class="middle" alt="'. + $this->_xmlEntities($smiley).'" />'; + } else { + $this->doc .= $this->_xmlEntities($smiley); + } + } + + /* + * not used + function wordblock($word) { + if ( array_key_exists($word, $this->badwords) ) { + $this->doc .= '** BLEEP **'; + } else { + $this->doc .= $this->_xmlEntities($word); + } + } + */ + + function entity($entity) { + if ( array_key_exists($entity, $this->entities) ) { + $this->doc .= $this->entities[$entity]; + } else { + $this->doc .= $this->_xmlEntities($entity); + } + } + + function multiplyentity($x, $y) { + $this->doc .= "$x×$y"; + } + + function singlequoteopening() { + global $lang; + $this->doc .= $lang['singlequoteopening']; + } + + function singlequoteclosing() { + global $lang; + $this->doc .= $lang['singlequoteclosing']; + } + + function apostrophe() { + global $lang; + $this->doc .= $lang['apostrophe']; + } + + function doublequoteopening() { + global $lang; + $this->doc .= $lang['doublequoteopening']; + } + + function doublequoteclosing() { + global $lang; + $this->doc .= $lang['doublequoteclosing']; + } + + /** + */ + function camelcaselink($link) { + $this->internallink($link,$link); + } + + + function locallink($hash, $name = NULL){ + global $ID; + $name = $this->_getLinkTitle($name, $hash, $isImage); + $hash = $this->_headerToLink($hash); + $title = $ID.' ↵'; + $this->doc .= '<a href="#'.$hash.'" title="'.$title.'" class="wikilink1">'; + $this->doc .= $name; + $this->doc .= '</a>'; + } + + /** + * Render an internal Wiki Link + * + * $search,$returnonly & $linktype are not for the renderer but are used + * elsewhere - no need to implement them in other renderers + * + * @author Andreas Gohr <andi@splitbrain.org> + */ + function internallink($id, $name = NULL, $search=NULL,$returnonly=false,$linktype='content') { + global $conf; + global $ID; + // default name is based on $id as given + $default = $this->_simpleTitle($id); + + // now first resolve and clean up the $id + resolve_pageid(getNS($ID),$id,$exists); + $name = $this->_getLinkTitle($name, $default, $isImage, $id, $linktype); + if ( !$isImage ) { + if ( $exists ) { + $class='wikilink1'; + } else { + $class='wikilink2'; + $link['rel']='nofollow'; + } + } else { + $class='media'; + } + + //keep hash anchor + list($id,$hash) = explode('#',$id,2); + if(!empty($hash)) $hash = $this->_headerToLink($hash); + + //prepare for formating + $link['target'] = $conf['target']['wiki']; + $link['style'] = ''; + $link['pre'] = ''; + $link['suf'] = ''; + // highlight link to current page + if ($id == $ID) { + $link['pre'] = '<span class="curid">'; + $link['suf'] = '</span>'; + } + $link['more'] = ''; + $link['class'] = $class; + $link['url'] = wl($id); + $link['name'] = $name; + $link['title'] = $id; + //add search string + if($search){ + ($conf['userewrite']) ? $link['url'].='?' : $link['url'].='&'; + if(is_array($search)){ + $search = array_map('rawurlencode',$search); + $link['url'] .= 's[]='.join('&s[]=',$search); + }else{ + $link['url'] .= 's='.rawurlencode($search); + } + } + + //keep hash + if($hash) $link['url'].='#'.$hash; + + //output formatted + if($returnonly){ + return $this->_formatLink($link); + }else{ + $this->doc .= $this->_formatLink($link); + } + } + + function externallink($url, $name = NULL) { + global $conf; + + $name = $this->_getLinkTitle($name, $url, $isImage); + + if ( !$isImage ) { + $class='urlextern'; + } else { + $class='media'; + } + + //prepare for formating + $link['target'] = $conf['target']['extern']; + $link['style'] = ''; + $link['pre'] = ''; + $link['suf'] = ''; + $link['more'] = ''; + $link['class'] = $class; + $link['url'] = $url; + + $link['name'] = $name; + $link['title'] = $this->_xmlEntities($url); + if($conf['relnofollow']) $link['more'] .= ' rel="nofollow"'; + + //output formatted + $this->doc .= $this->_formatLink($link); + } + + /** + */ + function interwikilink($match, $name = NULL, $wikiName, $wikiUri) { + global $conf; + + $link = array(); + $link['target'] = $conf['target']['interwiki']; + $link['pre'] = ''; + $link['suf'] = ''; + $link['more'] = ''; + $link['name'] = $this->_getLinkTitle($name, $wikiUri, $isImage); + + //get interwiki URL + $url = $this->_resolveInterWiki($wikiName,$wikiUri); + + if ( !$isImage ) { + $class = preg_replace('/[^_\-a-z0-9]+/i','_',$wikiName); + $link['class'] = "interwiki iw_$class"; + } else { + $link['class'] = 'media'; + } + + //do we stay at the same server? Use local target + if( strpos($url,DOKU_URL) === 0 ){ + $link['target'] = $conf['target']['wiki']; + } + + $link['url'] = $url; + $link['title'] = htmlspecialchars($link['url']); + + //output formatted + $this->doc .= $this->_formatLink($link); + } + + /** + */ + function windowssharelink($url, $name = NULL) { + global $conf; + global $lang; + //simple setup + $link['target'] = $conf['target']['windows']; + $link['pre'] = ''; + $link['suf'] = ''; + $link['style'] = ''; + + $link['name'] = $this->_getLinkTitle($name, $url, $isImage); + if ( !$isImage ) { + $link['class'] = 'windows'; + } else { + $link['class'] = 'media'; + } + + + $link['title'] = $this->_xmlEntities($url); + $url = str_replace('\\','/',$url); + $url = 'file:///'.$url; + $link['url'] = $url; + + //output formatted + $this->doc .= $this->_formatLink($link); + } + + function emaillink($address, $name = NULL) { + global $conf; + //simple setup + $link = array(); + $link['target'] = ''; + $link['pre'] = ''; + $link['suf'] = ''; + $link['style'] = ''; + $link['more'] = ''; + + $name = $this->_getLinkTitle($name, '', $isImage); + if ( !$isImage ) { + $link['class']='mail JSnocheck'; + } else { + $link['class']='media JSnocheck'; + } + + $address = $this->_xmlEntities($address); + $address = obfuscate($address); + $title = $address; + + if(empty($name)){ + $name = $address; + } + + if($conf['mailguard'] == 'visible') $address = rawurlencode($address); + + $link['url'] = 'mailto:'.$address; + $link['name'] = $name; + $link['title'] = $title; + + //output formatted + $this->doc .= $this->_formatLink($link); + } + + function internalmedia ($src, $title=NULL, $align=NULL, $width=NULL, + $height=NULL, $cache=NULL, $linking=NULL) { + global $ID; + list($src,$hash) = explode('#',$src,2); + resolve_mediaid(getNS($ID),$src, $exists); + + $noLink = false; + $render = ($linking == 'linkonly') ? false : true; + $link = $this->_getMediaLinkConf($src, $title, $align, $width, $height, $cache, $render); + + list($ext,$mime,$dl) = mimetype($src,false); + if(substr($mime,0,5) == 'image' && $render){ + $link['url'] = ml($src,array('id'=>$ID,'cache'=>$cache),($linking=='direct')); + }elseif($mime == 'application/x-shockwave-flash' && $render){ + // don't link flash movies + $noLink = true; + }else{ + // add file icons + $class = preg_replace('/[^_\-a-z0-9]+/i','_',$ext); + $link['class'] .= ' mediafile mf_'.$class; + $link['url'] = ml($src,array('id'=>$ID,'cache'=>$cache),true); + } + + if($hash) $link['url'] .= '#'.$hash; + + //markup non existing files + if (!$exists) + $link['class'] .= ' wikilink2'; + + //output formatted + if ($linking == 'nolink' || $noLink) $this->doc .= $link['name']; + else $this->doc .= $this->_formatLink($link); + } + + function externalmedia ($src, $title=NULL, $align=NULL, $width=NULL, + $height=NULL, $cache=NULL, $linking=NULL) { + list($src,$hash) = explode('#',$src,2); + $noLink = false; + $render = ($linking == 'linkonly') ? false : true; + $link = $this->_getMediaLinkConf($src, $title, $align, $width, $height, $cache, $render); + + $link['url'] = ml($src,array('cache'=>$cache)); + + list($ext,$mime,$dl) = mimetype($src,false); + if(substr($mime,0,5) == 'image' && $render){ + // link only jpeg images + // if ($ext != 'jpg' && $ext != 'jpeg') $noLink = true; + }elseif($mime == 'application/x-shockwave-flash' && $render){ + // don't link flash movies + $noLink = true; + }else{ + // add file icons + $class = preg_replace('/[^_\-a-z0-9]+/i','_',$ext); + $link['class'] .= ' mediafile mf_'.$class; + } + + if($hash) $link['url'] .= '#'.$hash; + + //output formatted + if ($linking == 'nolink' || $noLink) $this->doc .= $link['name']; + else $this->doc .= $this->_formatLink($link); + } + + /** + * Renders an RSS feed + * + * @author Andreas Gohr <andi@splitbrain.org> + */ + function rss ($url,$params){ + global $lang; + global $conf; + + require_once(DOKU_INC.'inc/FeedParser.php'); + $feed = new FeedParser(); + $feed->set_feed_url($url); + + //disable warning while fetching + if (!defined('DOKU_E_LEVEL')) { $elvl = error_reporting(E_ERROR); } + $rc = $feed->init(); + if (!defined('DOKU_E_LEVEL')) { error_reporting($elvl); } + + //decide on start and end + if($params['reverse']){ + $mod = -1; + $start = $feed->get_item_quantity()-1; + $end = $start - ($params['max']); + $end = ($end < -1) ? -1 : $end; + }else{ + $mod = 1; + $start = 0; + $end = $feed->get_item_quantity(); + $end = ($end > $params['max']) ? $params['max'] : $end;; + } + + $this->doc .= '<ul class="rss">'; + if($rc){ + for ($x = $start; $x != $end; $x += $mod) { + $item = $feed->get_item($x); + $this->doc .= '<li><div class="li">'; + // support feeds without links + $lnkurl = $item->get_permalink(); + if($lnkurl){ + // title is escaped by SimplePie, we unescape here because it + // is escaped again in externallink() FS#1705 + $this->externallink($item->get_permalink(), + htmlspecialchars_decode($item->get_title())); + }else{ + $this->doc .= ' '.$item->get_title(); + } + if($params['author']){ + $author = $item->get_author(0); + if($author){ + $name = $author->get_name(); + if(!$name) $name = $author->get_email(); + if($name) $this->doc .= ' '.$lang['by'].' '.$name; + } + } + if($params['date']){ + $this->doc .= ' ('.$item->get_local_date($conf['dformat']).')'; + } + if($params['details']){ + $this->doc .= '<div class="detail">'; + if($conf['htmlok']){ + $this->doc .= $item->get_description(); + }else{ + $this->doc .= strip_tags($item->get_description()); + } + $this->doc .= '</div>'; + } + + $this->doc .= '</div></li>'; + } + }else{ + $this->doc .= '<li><div class="li">'; + $this->doc .= '<em>'.$lang['rssfailed'].'</em>'; + $this->externallink($url); + if($conf['allowdebug']){ + $this->doc .= '<!--'.hsc($feed->error).'-->'; + } + $this->doc .= '</div></li>'; + } + $this->doc .= '</ul>'; + } + + // $numrows not yet implemented + function table_open($maxcols = NULL, $numrows = NULL){ + // initialize the row counter used for classes + $this->_counter['row_counter'] = 0; + $this->doc .= '<table class="inline">'.DOKU_LF; + } + + function table_close(){ + $this->doc .= '</table>'.DOKU_LF; + } + + function tablerow_open(){ + // initialize the cell counter used for classes + $this->_counter['cell_counter'] = 0; + $class = 'row' . $this->_counter['row_counter']++; + $this->doc .= DOKU_TAB . '<tr class="'.$class.'">' . DOKU_LF . DOKU_TAB . DOKU_TAB; + } + + function tablerow_close(){ + $this->doc .= DOKU_LF . DOKU_TAB . '</tr>' . DOKU_LF; + } + + function tableheader_open($colspan = 1, $align = NULL, $rowspan = 1){ + $class = 'class="col' . $this->_counter['cell_counter']++; + if ( !is_null($align) ) { + $class .= ' '.$align.'align'; + } + $class .= '"'; + $this->doc .= '<th ' . $class; + if ( $colspan > 1 ) { + $this->_counter['cell_counter'] += $colspan-1; + $this->doc .= ' colspan="'.$colspan.'"'; + } + if ( $rowspan > 1 ) { + $this->doc .= ' rowspan="'.$rowspan.'"'; + } + $this->doc .= '>'; + } + + function tableheader_close(){ + $this->doc .= '</th>'; + } + + function tablecell_open($colspan = 1, $align = NULL, $rowspan = 1){ + $class = 'class="col' . $this->_counter['cell_counter']++; + if ( !is_null($align) ) { + $class .= ' '.$align.'align'; + } + $class .= '"'; + $this->doc .= '<td '.$class; + if ( $colspan > 1 ) { + $this->_counter['cell_counter'] += $colspan-1; + $this->doc .= ' colspan="'.$colspan.'"'; + } + if ( $rowspan > 1 ) { + $this->doc .= ' rowspan="'.$rowspan.'"'; + } + $this->doc .= '>'; + } + + function tablecell_close(){ + $this->doc .= '</td>'; + } + + //---------------------------------------------------------- + // Utils + + /** + * Build a link + * + * Assembles all parts defined in $link returns HTML for the link + * + * @author Andreas Gohr <andi@splitbrain.org> + */ + function _formatLink($link){ + //make sure the url is XHTML compliant (skip mailto) + if(substr($link['url'],0,7) != 'mailto:'){ + $link['url'] = str_replace('&','&',$link['url']); + $link['url'] = str_replace('&amp;','&',$link['url']); + } + //remove double encodings in titles + $link['title'] = str_replace('&amp;','&',$link['title']); + + // be sure there are no bad chars in url or title + // (we can't do this for name because it can contain an img tag) + $link['url'] = strtr($link['url'],array('>'=>'%3E','<'=>'%3C','"'=>'%22')); + $link['title'] = strtr($link['title'],array('>'=>'>','<'=>'<','"'=>'"')); + + $ret = ''; + $ret .= $link['pre']; + $ret .= '<a href="'.$link['url'].'"'; + if(!empty($link['class'])) $ret .= ' class="'.$link['class'].'"'; + if(!empty($link['target'])) $ret .= ' target="'.$link['target'].'"'; + if(!empty($link['title'])) $ret .= ' title="'.$link['title'].'"'; + if(!empty($link['style'])) $ret .= ' style="'.$link['style'].'"'; + if(!empty($link['rel'])) $ret .= ' rel="'.$link['rel'].'"'; + if(!empty($link['more'])) $ret .= ' '.$link['more']; + $ret .= '>'; + $ret .= $link['name']; + $ret .= '</a>'; + $ret .= $link['suf']; + return $ret; + } + + /** + * Renders internal and external media + * + * @author Andreas Gohr <andi@splitbrain.org> + */ + function _media ($src, $title=NULL, $align=NULL, $width=NULL, + $height=NULL, $cache=NULL, $render = true) { + + $ret = ''; + + list($ext,$mime,$dl) = mimetype($src); + if(substr($mime,0,5) == 'image'){ + // first get the $title + if (!is_null($title)) { + $title = $this->_xmlEntities($title); + }elseif($ext == 'jpg' || $ext == 'jpeg'){ + //try to use the caption from IPTC/EXIF + require_once(DOKU_INC.'inc/JpegMeta.php'); + $jpeg =& new JpegMeta(mediaFN($src)); + if($jpeg !== false) $cap = $jpeg->getTitle(); + if($cap){ + $title = $this->_xmlEntities($cap); + } + } + if (!$render) { + // if the picture is not supposed to be rendered + // return the title of the picture + if (!$title) { + // just show the sourcename + $title = $this->_xmlEntities(basename(noNS($src))); + } + return $title; + } + //add image tag + $ret .= '<img src="'.ml($src,array('w'=>$width,'h'=>$height,'cache'=>$cache)).'"'; + $ret .= ' class="media'.$align.'"'; + + // make left/right alignment for no-CSS view work (feeds) + if($align == 'right') $ret .= ' align="right"'; + if($align == 'left') $ret .= ' align="left"'; + + if ($title) { + $ret .= ' title="' . $title . '"'; + $ret .= ' alt="' . $title .'"'; + }else{ + $ret .= ' alt=""'; + } + + if ( !is_null($width) ) + $ret .= ' width="'.$this->_xmlEntities($width).'"'; + + if ( !is_null($height) ) + $ret .= ' height="'.$this->_xmlEntities($height).'"'; + + $ret .= ' />'; + + }elseif($mime == 'application/x-shockwave-flash'){ + if (!$render) { + // if the flash is not supposed to be rendered + // return the title of the flash + if (!$title) { + // just show the sourcename + $title = basename(noNS($src)); + } + return $this->_xmlEntities($title); + } + + $att = array(); + $att['class'] = "media$align"; + if($align == 'right') $att['align'] = 'right'; + if($align == 'left') $att['align'] = 'left'; + $ret .= html_flashobject(ml($src,array('cache'=>$cache)),$width,$height, + array('quality' => 'high'), + null, + $att, + $this->_xmlEntities($title)); + }elseif($title){ + // well at least we have a title to display + $ret .= $this->_xmlEntities($title); + }else{ + // just show the sourcename + $ret .= $this->_xmlEntities(basename(noNS($src))); + } + + return $ret; + } + + function _xmlEntities($string) { + return htmlspecialchars($string,ENT_QUOTES,'UTF-8'); + } + + /** + * Creates a linkid from a headline + * + * @param string $title The headline title + * @param boolean $create Create a new unique ID? + * @author Andreas Gohr <andi@splitbrain.org> + */ + function _headerToLink($title,$create=false) { + if($create){ + return sectionID($title,$this->headers); + }else{ + $check = false; + return sectionID($title,$check); + } + } + + /** + * Construct a title and handle images in titles + * + * @author Harry Fuecks <hfuecks@gmail.com> + */ + function _getLinkTitle($title, $default, & $isImage, $id=NULL, $linktype='content') { + global $conf; + + $isImage = false; + if ( is_array($title) ) { + $isImage = true; + return $this->_imageTitle($title); + } elseif ( is_null($title) || trim($title)=='') { + if (useHeading($linktype) && $id) { + $heading = p_get_first_heading($id,true); + if ($heading) { + return $this->_xmlEntities($heading); + } + } + return $this->_xmlEntities($default); + } else { + return $this->_xmlEntities($title); + } + } + + /** + * Returns an HTML code for images used in link titles + * + * @todo Resolve namespace on internal images + * @author Andreas Gohr <andi@splitbrain.org> + */ + function _imageTitle($img) { + global $ID; + + // some fixes on $img['src'] + // see internalmedia() and externalmedia() + list($img['src'],$hash) = explode('#',$img['src'],2); + if ($img['type'] == 'internalmedia') { + resolve_mediaid(getNS($ID),$img['src'],$exists); + } + + return $this->_media($img['src'], + $img['title'], + $img['align'], + $img['width'], + $img['height'], + $img['cache']); + } + + /** + * _getMediaLinkConf is a helperfunction to internalmedia() and externalmedia() + * which returns a basic link to a media. + * + * @author Pierre Spring <pierre.spring@liip.ch> + * @param string $src + * @param string $title + * @param string $align + * @param string $width + * @param string $height + * @param string $cache + * @param string $render + * @access protected + * @return array + */ + function _getMediaLinkConf($src, $title, $align, $width, $height, $cache, $render) + { + global $conf; + + $link = array(); + $link['class'] = 'media'; + $link['style'] = ''; + $link['pre'] = ''; + $link['suf'] = ''; + $link['more'] = ''; + $link['target'] = $conf['target']['media']; + $link['title'] = $this->_xmlEntities($src); + $link['name'] = $this->_media($src, $title, $align, $width, $height, $cache, $render); + + return $link; + } + + +} + +//Setup VIM: ex: et ts=4 enc=utf-8 : diff --git a/lib/dokuwiki/inc/parser/xhtmlsummary.php b/lib/dokuwiki/inc/parser/xhtmlsummary.php new file mode 100644 index 000000000..b187fef01 --- /dev/null +++ b/lib/dokuwiki/inc/parser/xhtmlsummary.php @@ -0,0 +1,90 @@ +<?php +if(!defined('DOKU_INC')) die('meh.'); +require_once DOKU_INC . 'inc/parser/xhtml.php'; + +/** + * The summary XHTML form selects either up to the first two paragraphs + * it find in a page or the first section (whichever comes first) + * It strips out the table of contents if one exists + * Section divs are not used - everything should be nested in a single + * div with CSS class "page" + * Headings have their a name link removed and section editing links + * removed + * It also attempts to capture the first heading in a page for + * use as the title of the page. + * + * + * @author Harry Fuecks <hfuecks@gmail.com> + * @todo Is this currently used anywhere? Should it? + */ +class Doku_Renderer_xhtmlsummary extends Doku_Renderer_xhtml { + + // Namespace these variables to + // avoid clashes with parent classes + var $sum_paragraphs = 0; + var $sum_capture = true; + var $sum_inSection = false; + var $sum_summary = ''; + var $sum_pageTitle = false; + + function document_start() { + $this->doc .= DOKU_LF.'<div>'.DOKU_LF; + } + + function document_end() { + $this->doc = $this->sum_summary; + $this->doc .= DOKU_LF.'</div>'.DOKU_LF; + } + + // FIXME not supported anymore + function toc_open() { + $this->sum_summary .= $this->doc; + } + + // FIXME not supported anymore + function toc_close() { + $this->doc = ''; + } + + function header($text, $level, $pos) { + if ( !$this->sum_pageTitle ) { + $this->info['sum_pagetitle'] = $text; + $this->sum_pageTitle = true; + } + $this->doc .= DOKU_LF.'<h'.$level.'>'; + $this->doc .= $this->_xmlEntities($text); + $this->doc .= "</h$level>".DOKU_LF; + } + + function section_open($level) { + if ( $this->sum_capture ) { + $this->sum_inSection = true; + } + } + + function section_close() { + if ( $this->sum_capture && $this->sum_inSection ) { + $this->sum_summary .= $this->doc; + $this->sum_capture = false; + } + } + + function p_open() { + if ( $this->sum_capture && $this->sum_paragraphs < 2 ) { + $this->sum_paragraphs++; + } + parent :: p_open(); + } + + function p_close() { + parent :: p_close(); + if ( $this->sum_capture && $this->sum_paragraphs >= 2 ) { + $this->sum_summary .= $this->doc; + $this->sum_capture = false; + } + } + +} + + +//Setup VIM: ex: et ts=2 enc=utf-8 : |