1<?php 2/** 3 * DokuWiki Plugin cli (Syntax Component) 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Schplurtz le Déboulonné <schplurtz@laposte.net> 7 * @author Chris P. Jobling <C.P.Jobling@Swansea.ac.uk> 8 * @author Stephane Chazelas <stephane.chazelas@emerson.com> 9 * @author Andy Webber <dokuwiki@andywebber.com> 10 */ 11 12// must be run within Dokuwiki 13if (!defined('DOKU_INC')) die(); 14 15class syntax_plugin_cli extends DokuWiki_Syntax_Plugin { 16 17 const PROMPT=0; 18 const CONT=1; 19 const COMMENT=2; 20 const TYPE=3; 21 const STYLE=4; 22 // prompt, continue and comment stack 23 protected $stack; 24 protected $namedpcc=array(); 25 protected $init=false; 26 protected $genhtml=''; 27 28 function __construct() { 29 // Delay init until we actually need to parse some <cli> 30 return; 31 } 32 33 /** 34 * @return string Syntax mode type 35 */ 36 public function getType() { 37 return 'protected'; 38 } 39 /** 40 * What about paragraph ? 41 * 42 * Because we want to nest without having an open paragraph when an inner 43 * cli is closed, we lie. We will close and open paragraph ourselves. 44 * 45 * @return string Paragraph type 46 */ 47 public function getPType() { 48 return 'normal'; 49 } 50 /** 51 * @return int Sort order - Low numbers go before high numbers 52 */ 53 public function getSort() { 54 return 601; 55 } 56 57 /** 58 * delaied initialization. 59 * 60 * @return null 61 */ 62 protected function _init() { 63 if( $this->init ) return; 64 // DokuWiki always loads and instanciates the plugin. 65 // We don't want to load all this when the class is 66 // loaded. Only when the syntax is really met and there 67 // is need to parse should we do all this. It's not 68 // even needed to render() a conversation. 69 70 // hardcoded defaults 71 $this->stack=array(array('/^.{0,30}[$%>#](?:$|\\s)/', '/^.{0,30}>(?:$|\\s)/', '/(?:^#)|\\s#/', '', '')); 72 // override defaults with user config if exists. 73 if(''!=($s=$this->getConf('prompt'))) 74 $this->stack[0][self::PROMPT]=$this->_toregexp($s); 75 if(''!=($s=$this->getConf('continue'))) 76 $this->stack[0][self::CONT]=$this->_toregexp($s); 77 if(''!=($s=$this->getConf('comment'))) 78 $this->stack[0][self::COMMENT]=$this->_toregexp($s, 1); 79 $this->_parsenamedparam($this->getConf('namedprompt'), self::PROMPT); 80 $this->_parsenamedparam($this->getConf('namedcontinue'), self::CONT); 81 $this->_parsenamedparam($this->getConf('namedcomment'), self::COMMENT); 82 $this->init = true; 83 } 84 /** 85 * override default accepts() method to allow nesting 86 * 87 * ie, to get the plugin accepts its own entry syntax 88 */ 89 function accepts($mode) { 90 if ($mode == substr(get_class($this), 7)) return true; 91 return parent::accepts($mode); 92 } 93 /** 94 * Connect lookup pattern to lexer. 95 * 96 * @author Stephane Chazelas <stephane.chazelas@emerson.com> 97 * @author Schplurtz le Déboulonné <schplurtz@laposte.net> 98 * @param string $mode Parser mode 99 */ 100 public function connectTo($mode) { 101 // by the way, '<cli.*? >\r?\n?(?=.*?</cli>)' is the worst idea ever. 102 $this->Lexer->addEntryPattern('<cli(?:[)]?' . 103 '"(?:\\\\.|[^\\\\"])*"' . /* double-quoted string */ 104 '|\'(?:\\\\.|[^\'\\\\])*\'' . /* single-quoted string */ 105 '|\\\\.' . /* escaped character */ 106 '|[^\'"\\\\>]|[(?:])*>\r?\n?'. 107 '(?=.*?</cli>)' 108 ,$mode,'plugin_cli'); 109 110 /* 111 * The [)]? and |[(?:] is to work around a bug in lexer.php 112 * wrt nested (...) 113 */ 114 } 115 116 /** 117 * Connect exit pattern to lexer. 118 * 119 * @author Stephane Chazelas <stephane.chazelas@emerson.com> 120 */ 121 function postConnect() { 122 $this->Lexer->addExitPattern('\r?\n?</cli>','plugin_cli'); 123 } 124 125 /** 126 * Handle matches of the cli syntax 127 * 128 * @author Schplurtz le Déboulonné <Schplurtz@laposte.net> 129 * @param string $match The match of the syntax 130 * @param int $state The state of the handler 131 * @param int $pos The position in the document 132 * @param Doku_Handler $handler The handler 133 * @return mixed[] array of "lines". a "line" is a String or String[3] 134 */ 135 public function handle($match, $state, $pos, Doku_Handler $handler){ 136 switch ($state) { 137 case DOKU_LEXER_ENTER : 138 $this->_init(); 139 $level=count($this->stack) - 1; 140 $args = substr(rtrim($match), 4, -1); // strip '<cli' and '>EOL?' 141 $params=$this->_parseparams($args); 142 $type=$params['type']; 143 $style=$params['style']; 144 $this->current=array(); 145 // nested cli that only define style inherit prompts 146 if( $level && !empty($style) && ! $type && ! $params['prompt'] && ! $params['continue'] && ! $params['comment'] ) { 147 $last=end($this->stack); 148 $this->current[self::PROMPT]=$last[self::PROMPT]; 149 $this->current[self::CONT]=$last[self::CONT]; 150 $this->current[self::COMMENT]=$last[self::COMMENT]; 151 $this->current[self::TYPE]=$last[self::TYPE]; 152 } 153 else { 154 $this->current[self::PROMPT]=($params['prompt']) ? 155 $this->_toregexp($params['prompt']) 156 : (($type && ($t=$this->namedpcc[$type][self::PROMPT])) ? 157 $t 158 : $this->stack[0][self::PROMPT] 159 ); 160 $this->current[self::CONT]=($params['continue']) ? 161 $this->_toregexp($params['continue']) 162 : (($type && ($t=$this->namedpcc[$type][self::CONT])) ? 163 $t 164 : $this->stack[0][self::CONT] 165 ); 166 $this->current[self::COMMENT]=($params['comment']) ? 167 $this->_toregexp($params['comment'],1) 168 : (($type && ($t=$this->namedpcc[$type][self::COMMENT])) ? 169 $t 170 : $this->stack[0][self::COMMENT] 171 ); 172 $this->current[self::TYPE]=$type; 173 } 174 $this->current[self::STYLE]=$style; 175 $this->stack[]=$this->current; 176 // return nesting level and type and style 177 return array($state, count($this->stack) - 2, $type, $style); 178 case DOKU_LEXER_UNMATCHED : 179 // return parsed conversation and type and style 180 $top=end($this->stack); 181 return array( $state, $this->_parse_conversation($match), $top[self::TYPE], $top[self::STYLE] ); 182 case DOKU_LEXER_EXIT : 183 $top=array_pop($this->stack); 184 $this->current=end($this->stack); 185 // return same nested level as DOKU_LEXER_ENTER and type and style 186 return array($state, count($this->stack) -1, $top[self::TYPE], $top[self::STYLE] ); 187 } 188 return array(); //not reached 189 } 190 /** 191 * analyze the conversation. 192 * 193 * The conversation is split in lines and analyzed line by 194 * line. If no prompt can be recognised on a line, then that 195 * line is obviously a computer output and it is kept as it 196 * is. Otherwise, the line is further split into (prompt, 197 * input, comment) triplet. Input and comment may be empty. 198 * 199 * @author Schplurtz le Déboulonné <Schplurtz@laposte.net> 200 * @author Andy Webber <dokuwiki@andywebber.com> 201 * @param $txt String potentially multiline string 202 * @return mixed[] array of String or Array 203 */ 204 protected function _parse_conversation($txt) { 205 $res=array(); 206 $main_prompt=$this->current[self::PROMPT]; 207 $cont_prompt=$this->current[self::CONT]; 208 $lines = preg_split('/\n\r|\n|\r/',$txt); 209 // skip first and last line if they are empty 210 if ( trim($lines[0]) == '' ) unset( $lines[0] ); 211 if ( trim(end($lines)) == '' ) array_pop($lines); 212 // continuation lines can only appear after a main-prompt line or continuation-line 213 // but NOT as the first prompt. IE not after a line where there was no prompt. 214 $prompt_continue=false; 215 $parsed_lines=array(); 216 foreach($lines as $line) { 217 if ($prompt_continue && preg_match($cont_prompt, $line, $matches)) { 218 $parsed_lines[]=$this->_parseline( $line, $matches[0] ); 219 continue; 220 } 221 $prompt_continue=false; 222 if (preg_match($main_prompt, $line, $matches)) { 223 $prompt_continue=true; 224 $parsed_lines[]=$this->_parseline( $line, $matches[0] ); 225 continue; 226 } 227 $parsed_lines[]=$line; 228 } 229 return $parsed_lines; 230 } 231 /** 232 * split line in (prompt, command, comment) triplet. 233 * 234 * @author Schplurtz le Déboulonné <Schplurtz@laposte.net> 235 * @param $line String the original line 236 * @param $prompt String The current recognised prompt 237 * @return String[] the 3 components of the line : prompt, command, comment 238 */ 239 protected function _parseline( $line, $prompt ) { 240 $comment=''; 241 $index=strlen($prompt); 242 $comcom = substr( $line, $index ); 243 $ar=preg_split($this->current[self::COMMENT], $comcom, 2, PREG_SPLIT_DELIM_CAPTURE); 244 if( isset($ar[1]) ) { 245 $comment=$ar[1].end($ar); 246 } 247 $ret=array( $prompt, $ar[0], $comment ); 248 return $ret; 249 } 250 251 /** 252 * Render output. step by step generate html. 253 * When generation is complete, check mode : if mode is xhtml, then 254 * adds generate text to document. If mode is odt, then call odt renderer 255 * that will convert html to odt. 256 * 257 * @param string $mode Renderer mode (supported modes: xhtml) 258 * @param Doku_Renderer $renderer The renderer 259 * @param array $data The data from the handler() function 260 * @return bool If rendering was successful. 261 */ 262 public function render($mode, Doku_Renderer $renderer, $data) { 263 if($mode !== 'xhtml' && $mode !== 'odt' && $mode !== 'odt_pdf') { 264 return false; 265 } 266 list($state, $thing, $type, $style) = $data; 267 switch ($state) { 268 case DOKU_LEXER_ENTER : 269 // $thing is nesting level here. 270 // for outer <cli>, initialize string. 271 // for nested <cli>, add a div. 272 if( 0 == $thing ) { 273 $this->genhtml = ''; 274 } 275 else { 276 $this->genhtml .= "<div class='$type $style'>"; 277 if( $mode != 'xhtml' ) // odt needs an additional CR. bug ? 278 $this->genhtml .= DOKU_LF; 279 } 280 break; 281 case DOKU_LEXER_UNMATCHED : 282 // Here $thing is an array of parsed lines as returned by _parseline 283 $not_first_line=false; 284 foreach( $thing as $line ) { 285 if($not_first_line) 286 $this->genhtml .= DOKU_LF; 287 else 288 $not_first_line=true; 289 if(is_array($line)) { 290 $this->genhtml .= '<span class="cli_prompt">' . hsc($line[0]) . "</span>"; 291 if( '' != $line[1] ) 292 $this->genhtml .= '<span class="cli_command">' . hsc($line[1]) . "</span>"; 293 if( '' != $line[2] ) 294 $this->genhtml .= '<span class="cli_comment">' . hsc($line[2]) . "</span>"; 295 } else { 296 $this->genhtml .= '<span class="cli_output">' . hsc($line) . "</span>"; 297 } 298 } 299 break; 300 case DOKU_LEXER_EXIT : 301 // $thing is nesting level here. 302 // only close <pre> if we're closing the outermost <cli> 303 if( 0 === $thing ) { 304 if( $mode == 'xhtml' ) { 305 $renderer->doc .= "</p><pre class='cli $type $style'>"; 306 $renderer->doc .= $this->genhtml; 307 $renderer->doc .= '</pre><p>'; 308 } 309 else { 310 if(!$renderer->styleExists('Command Line Interface')) { 311 $style=array(//FIXME: list of style porperties is in lib/plugins/odt/ODT/styles/ODTParagraphStyle.php 312 'style-name' => 'Command Line Interface', 313 'style-display-name' => 'Command Line Interface', 314 'background-color' => $this->getConf('odtbackground'), 315 'border' => $this->getConf('odtborderwidth').' solid '. 316 $this->getConf('odtbordercolor'), 317 ); 318 $renderer->createParagraphStyle( $style ); 319 } 320 $options=array(); 321 // see https://github.com/LarsGit223/dokuwiki-plugin-odt/commit/19f42d58f1d97758a2ccbac38aae7253826eb59a 322 $options ['escape_content'] = 'false'; 323 $options ['space'] = 'preserve'; 324 $options ['media_selector'] = 'screen'; 325 $options ['p_style'] = 'Command Line Interface'; 326 $options ['element'] = 'pre'; 327 $renderer->generateODTfromHTMLCode($this->genhtml, $options); 328 } 329 } 330 else // closing inner <cli> 331 $this->genhtml .= '</div>'; 332 if( $mode != 'xhtml' ) // odt needs an additional CR. bug ? 333 $this->genhtml .= DOKU_LF; 334 break; 335 } 336 } 337 338 /** 339 * parse named prompts or comments from config 340 * 341 * @author Schplurtz le Déboulonné <Schplurtz@laposte.net> 342 * @param $s String The configuration value 343 * @param $kind Int One of self::PROMPT, CONT, COMMENT 344 * @return void 345 */ 346 protected function _parsenamedparam($s, $kind) { 347 foreach(preg_split('/\n\r|\n|\r/',$s) as $line){ 348 if(''==$line) 349 continue; 350 list($nom,$val)=explode(':', $line, 2); 351 $this->namedpcc[$nom][$kind]=$this->_toregexp($val, $kind == self::COMMENT); 352 } 353 } 354 355 /** 356 * transform a string or regexp into a regexp. 357 * 358 * The string is to match either a prompt 359 * or a comment, and is thus anchored accordingly. 360 * 361 * @author Schplurtz le Déboulonné <Schplurtz@laposte.net> 362 * @param $s String The string to transform 363 * @param $is_comment_re Boolean true if the re is going to match a comment. 364 * @return String The regexp. 365 */ 366 function _toregexp( $s, $is_comment_re=false ) { 367 if(preg_match('/^([\/|=,;%@#]).+(\1)$/', $s)) { 368 if( $is_comment_re ) 369 $s = $s[0] . '(' . substr( $s, 1, -1 ) . ')' . $s[0]; 370 return $s; 371 } 372 $r= $is_comment_re? '/(' : '/^.*?'; 373 foreach( str_split( $s ) as $c ) 374 $r .= ('\\' == $c || $c == '/') ? "[\\$c]" : "[$c]"; 375 $r .= $is_comment_re? ')/' : '/'; 376 return $r; 377 } 378 379 /** 380 * tokenize a string. Could'nt find the correct set of regexps, So I use 381 * a DFA. 382 * 383 * recognize bare word, =, \-escaped char, and single or double quoted strings. 384 * ie «a\ b="foo\"bar"» produces "a b", '=', 'foo"bar'. 385 * This function implements the following DFA. See dot(1) if you 386 * need to visualize it. Or trust me and run something like that : 387 * sed -ne '/digr''aph/,/}/s/ *.//p' syntax.php | dot -Tpng >/tmp/graph.png 388 * digraph { 389 * node [shape=circle]; 390 * 0 -> 0 [label="\\s"] 391 * 0 -> 1 [label="\""] 392 * 0 -> 3 [label="'"] 393 * 0 -> 6 [label="\\ [+]"] 394 * 0 -> 7 [label="= [+]"] 395 * 0 -> 5 [label=". [+]"] 396 * 397 * 1 -> 2 [label="\\ [+]"] 398 * 1 -> 0 [label="\" [A]"] 399 * 1 -> 1 [label=". [+]"] 400 * 2 -> 1 [label="[\"\\] [-]"] 401 * 2 -> 8 [label=". [+]"] 402 * 8 -> 2 [label="\\ [+]"] 403 * 8 -> 0 [label="\" [A]"] 404 * 8 -> 1 [label=". [+]"] 405 * 406 * 3 -> 4 [label="\\ [+]"] 407 * 3 -> 0 [label="' [A]"] 408 * 3 -> 3 [label=". [+]"] 409 * 4 -> 3 [label="['\\] [-]"] 410 * 4 -> 9 [label=". [+]"] 411 * 9 -> 4 [label="\\ [+]"] 412 * 9 -> 0 [label="' [A]"] 413 * 9 -> 3 [label=". [+]"] 414 * 415 * 5 -> 6 [label="\\ [+]"] 416 * 5 -> 0 [label="\\s [A]"] 417 * 5 -> 7 [label="= [A+]"] 418 * 5 -> 1 [label="\" [A]"] 419 * 5 -> 3 [label="' [A]"] 420 * 5 -> 5 [label=". [+]"] 421 * 422 * 6 -> 5 [label="[\"' =\\>] [-]"] 423 * 6 -> 5 [label=". [+]"] 424 * 425 * 7 -> 0 [label="\\s [A]"] 426 * 7 -> 1 [label="\" [A]"] 427 * 7 -> 3 [label="' [A]"] 428 * 7 -> 6 [label="\\ [A+]"] 429 * 7 -> 5 [label=". [A+]"] 430 * e [shape=box,label="arc label : current char [actions]\n+: add current char to token\n-: replace last char in token with current char\nA: Accept cur token. New token\nInitial state: 0\nValid end states : 0, 5, 7"] 431 * } 432 * 433 * @author Schplurtz le Déboulonné 434 * @param $str String The string to tokenize 435 * @return String[] An array of tokens 436 */ 437 protected function _tokenize( $str ) { 438 $trs=array( //array ( current chart > new state,... ) 439 0 => array( ' ' => 0, "\t" => 0, '"' => 1, "'" => 3, '\\' => 6, '=' => 7, 'def' => 5 ), 440 1 => array( '\\' => 2, '"' => 0, 'def' => 1 ), 441 2 => array( 'def' => 1 ), 442 3 => array( '\\' => 4, "'" => 0, 'def' => 3 ), 443 4 => array( 'def' => 3 ), 444 5 => array( '\\' => 6, ' ' => 0, "\t" => 0, '=' => 7, '"' => 1, "'" => 3, 'def' => 5), 445 6 => array( 'def' => 5 ), 446 7 => array( ' ' => 0, "\t" => 0, '"' => 1, "'" => 3, "\\" => 6, 'def' => 5), 447 ); 448 $acs=array( // new state => action OR new state => array ( char => action, char => action... ) 449 0 => array( 6 => '+', 7 => '+', 5 => '+',), 450 1 => array( 2 => '+', 0 => 'A', 1 => '+',), 451 2 => array( 1 => array( '"' => '-', "\\" => '-', 'def' => '+')), 452 3 => array( 4 => '+', 0 => 'A', 3 => '+',), 453 4 => array( 3 => array( "'" => '-', "\\" => '-', 'def' => '+',)), 454 5 => array( 6 => '+', 0 => 'A', 7 => 'A+', 1 => 'A', 3 => 'A', 5 => '+',), 455 6 => array( 5 => array( '"' => '-', "'" => '-', ' ' => '-', '=' => '-', "\\" => '-', '>' => '-', 'def' =>'+'),), 456 7 => array( 0 => 'A', 1 => 'A', 3 => 'A', 6 => 'A+', 5 => 'A+',), 457 ); 458 459 $toks=array(); 460 $tok=''; 461 $state=0; 462 foreach( str_split($str) as $c ) { 463 $to = array_key_exists($c, $trs[$state]) ? $trs[$state][$c] : $trs[$state]['def']; 464 if( array_key_exists($to, $acs[$state]) ) { 465 $action=$acs[$state][$to]; 466 if(is_array($action)) { 467 $action = array_key_exists($c, $action) ? $action[$c] : $action['def']; 468 } 469 switch($action) { 470 case '+' : $tok .= $c; break; 471 case '-' : $tok = substr($tok, 0, -1).$c; break; 472 case 'A' : $toks[] = $tok; $tok=''; break; 473 case 'A+' : $toks[] = $tok; $tok=$c; break; 474 } 475 } 476 $state=$to; 477 } 478 if($tok != '') { 479 if ($state == 0 || $state == 5 || $state == 7) 480 $toks[] = $tok; 481 else 482 msg( 'In <cli ...>, ignored malformed text «'.hsc($tok).'».', 2, '', '', MSG_USERS_ONLY ); 483 } 484 485 if($this->getConf('debug')) 486 msg( "line <pre>«".hsc($str)."»</pre>parsed as :<pre>".hsc(print_r($toks,1))."</pre>" ); 487 return $toks; 488 } 489 490 /** 491 * 492 * parse params of "<cli param...>" line 493 * 494 * param is expected to be a blank separated list of foo[=bar] 495 * statement. When there is no =bar part, then t=foo is assumed. 496 * The last non assigment statement will overwrite all the others. 497 * For example, for «a=b c = "d" zorg», the returned 498 * array will be ( 'a' => 'b', 'c' => 'd', 't' => 'zorg' ). 499 * 500 * @author Schplurtz le Déboulonné 501 * @param $str String The string to tokenize 502 * @return array The associative array of tokens 503 */ 504 protected function _parseparams( $str ) { 505 if($this->getConf('debug')) 506 msg('_parseparams calling _tokenize("'.hsc($str).'")', 1); 507 $toks=$this->_tokenize($str); 508 $n=count($toks) ; 509 $values=array( 'prompt' => false, 'continue' => false, 'comment' => false, 510 'type' => false, 'style' => '', ); 511 512 // check tokens by triplet. 513 for( $i = 0; $i < $n - 2; ++$i ) { 514 if( $toks[$i + 1] === '=' ) { 515 $key=$this->_map($toks[$i]); 516 if($key) { 517 if( $values[$key] !== false) { 518 msg( 'In <cli ...>, value «'.hsc($toks[$i+2]).'» override previously defined '.hsc($key).' «'. hsc($values[$key]).'».', 2, '', '', MSG_USERS_ONLY ); 519 } 520 $values[$key]=$toks[$i+2]; 521 } 522 else { 523 msg( 'Error, unknown attribute «' . hsc($toks[$i]) . '» in <cli> parametre', -1, '', '', MSG_USERS_ONLY ); 524 } 525 $i += 2; 526 } 527 else { 528 // if not format X = Y, add current token to style. 529 $values['style'].=' '.$toks[$i]; 530 } 531 } 532 // add 1 or 2 remaining tokens to style 533 for( ; $i < $n; ++$i ) { 534 $values['style'].=' '.$toks[$i]; 535 } 536 return $values; 537 } 538 539 /** 540 * check <cli param names and maps them to canonical values. 541 * 542 * @author Schplurtz le Déboulonné <schplurtz@laposte.net> 543 * @return Mixed canonical attribute name or false if attr is unknown 544 * One of 'type', 'prompt', 'continue', 'comment'. 545 */ 546 protected function _map( $s ) { 547 if( $s == 'lang' || $s == 'language' || $s == 'type' || $s == 't' || $s == 'l' || $s == 'lng' ) 548 return 'type'; 549 if( $s == 'prompt' ) 550 return 'prompt'; 551 if( $s == 'continue' || $s == 'cont' ) 552 return 'continue'; 553 if( $s == 'comment' ) 554 return 'comment'; 555 return false; 556 } 557} 558