1<?php
2/**
3 * DokuWiki Plugin cli (Syntax Component)
4 *
5 * @license      GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author       Schplurtz le Déboulonné <schplurtz@laposte.net>
7 * @author       Chris P. Jobling <C.P.Jobling@Swansea.ac.uk>
8 * @author       Stephane Chazelas <stephane.chazelas@emerson.com>
9 * @author       Andy Webber <dokuwiki@andywebber.com>
10 */
11
12// must be run within Dokuwiki
13if (!defined('DOKU_INC')) die();
14
15class syntax_plugin_cli extends DokuWiki_Syntax_Plugin {
16
17    const PROMPT=0;
18    const CONT=1;
19    const COMMENT=2;
20    const TYPE=3;
21    const STYLE=4;
22    // prompt, continue and comment stack
23    protected $stack;
24    protected $namedpcc=array();
25    protected $init=false;
26    protected $genhtml='';
27
28    function __construct() {
29        // Delay init until we actually need to parse some <cli>
30        return;
31    }
32
33    /**
34     * @return string Syntax mode type
35     */
36    public function getType() {
37        return 'protected';
38    }
39    /**
40     * What about paragraph ?
41     *
42     * Because we want to nest without having an open paragraph when an inner
43     * cli is closed, we lie. We will close and open paragraph ourselves.
44     *
45     * @return string Paragraph type
46     */
47    public function getPType() {
48        return 'normal';
49    }
50    /**
51     * @return int Sort order - Low numbers go before high numbers
52     */
53    public function getSort() {
54        return 601;
55    }
56
57    /**
58     * delaied initialization.
59     *
60     * @return null
61     */
62    protected function _init() {
63        if( $this->init ) return;
64        // DokuWiki always loads and instanciates the plugin.
65        // We don't want to load all this when the class is
66        // loaded. Only when the syntax is really met and there
67        // is need to parse should we do all this. It's not
68        // even needed to render() a conversation.
69
70        // hardcoded defaults
71        $this->stack=array(array('/^.{0,30}[$%>#](?:$|\\s)/', '/^.{0,30}>(?:$|\\s)/', '/(?:^#)|\\s#/', '', ''));
72        // override defaults with user config if exists.
73        if(''!=($s=$this->getConf('prompt')))
74            $this->stack[0][self::PROMPT]=$this->_toregexp($s);
75        if(''!=($s=$this->getConf('continue')))
76            $this->stack[0][self::CONT]=$this->_toregexp($s);
77        if(''!=($s=$this->getConf('comment')))
78            $this->stack[0][self::COMMENT]=$this->_toregexp($s, 1);
79        $this->_parsenamedparam($this->getConf('namedprompt'), self::PROMPT);
80        $this->_parsenamedparam($this->getConf('namedcontinue'), self::CONT);
81        $this->_parsenamedparam($this->getConf('namedcomment'), self::COMMENT);
82        $this->init = true;
83    }
84    /**
85     * override default accepts() method to allow nesting
86     *
87     * ie, to get the plugin accepts its own entry syntax
88     */
89    function accepts($mode) {
90        if ($mode == substr(get_class($this), 7)) return true;
91        return parent::accepts($mode);
92    }
93    /**
94     * Connect lookup pattern to lexer.
95     *
96     * @author       Stephane Chazelas <stephane.chazelas@emerson.com>
97     * @author       Schplurtz le Déboulonné <schplurtz@laposte.net>
98     * @param string $mode Parser mode
99     */
100    public function connectTo($mode) {
101        // by the way, '<cli.*? >\r?\n?(?=.*?</cli>)' is the worst idea ever.
102        $this->Lexer->addEntryPattern('<cli(?:[)]?' .
103            '"(?:\\\\.|[^\\\\"])*"' .     /* double-quoted string */
104            '|\'(?:\\\\.|[^\'\\\\])*\'' . /* single-quoted string */
105            '|\\\\.' .                    /* escaped character */
106            '|[^\'"\\\\>]|[(?:])*>\r?\n?'.
107            '(?=.*?</cli>)'
108            ,$mode,'plugin_cli');
109
110            /*
111             * The [)]? and |[(?:] is to work around a bug in lexer.php
112             * wrt nested (...)
113             */
114    }
115
116    /**
117     * Connect exit pattern to lexer.
118     *
119     * @author       Stephane Chazelas <stephane.chazelas@emerson.com>
120     */
121    function postConnect() {
122        $this->Lexer->addExitPattern('\r?\n?</cli>','plugin_cli');
123    }
124
125    /**
126     * Handle matches of the cli syntax
127     *
128     * @author Schplurtz le Déboulonné <Schplurtz@laposte.net>
129     * @param string          $match   The match of the syntax
130     * @param int             $state   The state of the handler
131     * @param int             $pos     The position in the document
132     * @param Doku_Handler    $handler The handler
133     * @return mixed[] array of "lines". a "line" is a String or String[3]
134     */
135    public function handle($match, $state, $pos, Doku_Handler $handler){
136        switch ($state) {
137        case DOKU_LEXER_ENTER :
138            $this->_init();
139            $level=count($this->stack) - 1;
140            $args = substr(rtrim($match), 4, -1); // strip '<cli' and '>EOL?'
141            $params=$this->_parseparams($args);
142            $type=$params['type'];
143            $style=$params['style'];
144            $this->current=array();
145            // nested cli that only define style inherit prompts
146            if( $level && !empty($style) && ! $type && ! $params['prompt']  && ! $params['continue']  && ! $params['comment'] ) {
147                $last=end($this->stack);
148                $this->current[self::PROMPT]=$last[self::PROMPT];
149                $this->current[self::CONT]=$last[self::CONT];
150                $this->current[self::COMMENT]=$last[self::COMMENT];
151                $this->current[self::TYPE]=$last[self::TYPE];
152            }
153            else {
154                $this->current[self::PROMPT]=($params['prompt']) ?
155                    $this->_toregexp($params['prompt'])
156                    : (($type && ($t=$this->namedpcc[$type][self::PROMPT])) ?
157                        $t
158                        : $this->stack[0][self::PROMPT]
159                    );
160                $this->current[self::CONT]=($params['continue']) ?
161                    $this->_toregexp($params['continue'])
162                    : (($type && ($t=$this->namedpcc[$type][self::CONT])) ?
163                        $t
164                        : $this->stack[0][self::CONT]
165                    );
166                $this->current[self::COMMENT]=($params['comment']) ?
167                    $this->_toregexp($params['comment'],1)
168                    : (($type && ($t=$this->namedpcc[$type][self::COMMENT])) ?
169                        $t
170                        : $this->stack[0][self::COMMENT]
171                    );
172                $this->current[self::TYPE]=$type;
173            }
174            $this->current[self::STYLE]=$style;
175            $this->stack[]=$this->current;
176            // return nesting level and type and style
177            return array($state, count($this->stack) - 2, $type, $style);
178        case DOKU_LEXER_UNMATCHED :
179            // return parsed conversation and type and style
180            $top=end($this->stack);
181            return array( $state, $this->_parse_conversation($match), $top[self::TYPE], $top[self::STYLE] );
182        case DOKU_LEXER_EXIT :
183            $top=array_pop($this->stack);
184            $this->current=end($this->stack);
185            // return same nested level as DOKU_LEXER_ENTER and type and style
186            return array($state, count($this->stack) -1, $top[self::TYPE], $top[self::STYLE] );
187        }
188        return array(); //not reached
189    }
190    /**
191     * analyze the conversation.
192     *
193     * The conversation is split in lines and analyzed line by
194     * line. If no prompt can be recognised on a line, then that
195     * line is obviously a computer output and it is kept as it
196     * is. Otherwise, the line is further split into (prompt,
197     * input, comment) triplet. Input and comment may be empty.
198     *
199     * @author Schplurtz le Déboulonné <Schplurtz@laposte.net>
200     * @author Andy Webber <dokuwiki@andywebber.com>
201     * @param  $txt     String   potentially multiline string
202     * @return mixed[]           array of String or Array
203     */
204    protected function _parse_conversation($txt) {
205        $res=array();
206        $main_prompt=$this->current[self::PROMPT];
207        $cont_prompt=$this->current[self::CONT];
208        $lines = preg_split('/\n\r|\n|\r/',$txt);
209        // skip first and last line if they are empty
210        if ( trim($lines[0]) == '' ) unset( $lines[0] );
211        if ( trim(end($lines)) == '' ) array_pop($lines);
212        // continuation lines can only appear after a main-prompt line or continuation-line
213        // but NOT as the first prompt. IE not after a line where there was no prompt.
214        $prompt_continue=false;
215        $parsed_lines=array();
216        foreach($lines as $line) {
217            if ($prompt_continue && preg_match($cont_prompt, $line, $matches)) {
218                $parsed_lines[]=$this->_parseline( $line, $matches[0] );
219                continue;
220            }
221            $prompt_continue=false;
222            if (preg_match($main_prompt, $line, $matches)) {
223                $prompt_continue=true;
224                $parsed_lines[]=$this->_parseline( $line, $matches[0] );
225                continue;
226            }
227            $parsed_lines[]=$line;
228        }
229        return $parsed_lines;
230    }
231    /**
232     * split line in (prompt, command, comment) triplet.
233     *
234     * @author Schplurtz le Déboulonné <Schplurtz@laposte.net>
235     * @param  $line    String   the original line
236     * @param  $prompt  String   The current recognised prompt
237     * @return String[]          the 3 components of the line : prompt, command, comment
238     */
239    protected function _parseline( $line, $prompt ) {
240        $comment='';
241        $index=strlen($prompt);
242        $comcom = substr( $line, $index );
243        $ar=preg_split($this->current[self::COMMENT], $comcom, 2, PREG_SPLIT_DELIM_CAPTURE);
244        if( isset($ar[1]) ) {
245            $comment=$ar[1].end($ar);
246        }
247        $ret=array( $prompt, $ar[0], $comment );
248        return $ret;
249    }
250
251    /**
252     * Render output. step by step generate html.
253     * When generation is complete, check mode : if mode is xhtml, then
254     * adds generate text to document. If mode is odt, then call odt renderer
255     * that will convert html to odt.
256     *
257     * @param string         $mode      Renderer mode (supported modes: xhtml)
258     * @param Doku_Renderer  $renderer  The renderer
259     * @param array          $data      The data from the handler() function
260     * @return bool If rendering was successful.
261     */
262    public function render($mode, Doku_Renderer $renderer, $data) {
263        if($mode !== 'xhtml' && $mode !== 'odt' && $mode !== 'odt_pdf') {
264            return false;
265        }
266        list($state, $thing, $type, $style) = $data;
267        switch ($state) {
268        case DOKU_LEXER_ENTER :
269            // $thing is nesting level here.
270            // for outer <cli>, initialize string.
271            // for nested <cli>, add a div.
272            if( 0 == $thing ) {
273                $this->genhtml = '';
274            }
275            else {
276                $this->genhtml .= "<div class='$type $style'>";
277                if( $mode != 'xhtml' ) // odt needs an additional CR. bug ?
278                     $this->genhtml .= DOKU_LF;
279            }
280        break;
281        case DOKU_LEXER_UNMATCHED :
282            // Here $thing is an array of parsed lines as returned by _parseline
283            $not_first_line=false;
284            foreach( $thing as $line ) {
285                if($not_first_line)
286                    $this->genhtml .= DOKU_LF;
287                else
288                    $not_first_line=true;
289                if(is_array($line)) {
290                    $this->genhtml .= '<span class="cli_prompt">' . hsc($line[0]) . "</span>";
291                    if( '' != $line[1] )
292                        $this->genhtml .= '<span class="cli_command">' . hsc($line[1]) . "</span>";
293                    if( '' != $line[2] )
294                        $this->genhtml .= '<span class="cli_comment">' . hsc($line[2]) . "</span>";
295                } else {
296                    $this->genhtml .= '<span class="cli_output">' . hsc($line) . "</span>";
297                }
298            }
299        break;
300        case DOKU_LEXER_EXIT :
301            // $thing is nesting level here.
302            // only close <pre> if we're closing the outermost <cli>
303            if( 0 === $thing ) {
304                if( $mode == 'xhtml' ) {
305                    $renderer->doc .= "</p><pre class='cli $type $style'>";
306                    $renderer->doc .= $this->genhtml;
307                    $renderer->doc .= '</pre><p>';
308                }
309                else {
310                    if(!$renderer->styleExists('Command Line Interface')) {
311                        $style=array(//FIXME: list of style porperties is in lib/plugins/odt/ODT/styles/ODTParagraphStyle.php
312                                     'style-name' => 'Command Line Interface',
313                                     'style-display-name' => 'Command Line Interface',
314                                     'background-color' => $this->getConf('odtbackground'),
315                                     'border' => $this->getConf('odtborderwidth').' solid '.
316                                                 $this->getConf('odtbordercolor'),
317                                    );
318                        $renderer->createParagraphStyle( $style );
319                    }
320                    $options=array();
321                    // see https://github.com/LarsGit223/dokuwiki-plugin-odt/commit/19f42d58f1d97758a2ccbac38aae7253826eb59a
322                    $options ['escape_content'] = 'false';
323                    $options ['space'] = 'preserve';
324                    $options ['media_selector'] = 'screen';
325                    $options ['p_style'] = 'Command Line Interface';
326                    $options ['element'] = 'pre';
327                    $renderer->generateODTfromHTMLCode($this->genhtml, $options);
328                }
329            }
330            else // closing inner <cli>
331                $this->genhtml .= '</div>';
332                if( $mode != 'xhtml' ) // odt needs an additional CR. bug ?
333                     $this->genhtml .= DOKU_LF;
334        break;
335        }
336    }
337
338    /**
339     * parse named prompts or comments from config
340     *
341     * @author Schplurtz le Déboulonné <Schplurtz@laposte.net>
342     * @param  $s             String        The configuration value
343     * @param  $kind          Int           One of self::PROMPT, CONT, COMMENT
344     * @return void
345     */
346    protected function _parsenamedparam($s, $kind) {
347        foreach(preg_split('/\n\r|\n|\r/',$s) as $line){
348            if(''==$line)
349                continue;
350            list($nom,$val)=explode(':', $line, 2);
351            $this->namedpcc[$nom][$kind]=$this->_toregexp($val, $kind == self::COMMENT);
352        }
353    }
354
355    /**
356     * transform a string or regexp into a regexp.
357     *
358     * The string is to match either a prompt
359     * or a comment, and is thus anchored accordingly.
360     *
361     * @author Schplurtz le Déboulonné <Schplurtz@laposte.net>
362     * @param  $s             String        The string to transform
363     * @param  $is_comment_re Boolean       true if the re is going to match a comment.
364     * @return String                       The regexp.
365     */
366    function _toregexp( $s, $is_comment_re=false ) {
367        if(preg_match('/^([\/|=,;%@#]).+(\1)$/', $s)) {
368            if( $is_comment_re )
369                $s = $s[0] . '(' . substr( $s, 1, -1 ) . ')' . $s[0];
370            return $s;
371        }
372        $r= $is_comment_re? '/(' : '/^.*?';
373        foreach( str_split( $s ) as $c )
374            $r .= ('\\' == $c || $c == '/') ? "[\\$c]" :  "[$c]";
375        $r .= $is_comment_re? ')/' : '/';
376        return $r;
377    }
378
379    /**
380     * tokenize a string. Could'nt find the correct set of regexps, So I use
381     * a DFA.
382     *
383     * recognize bare word, =, \-escaped char, and single or double quoted strings.
384     * ie «a\ b="foo\"bar"» produces "a b", '=', 'foo"bar'.
385     * This function implements the following DFA. See dot(1) if you
386     * need to visualize it. Or trust me and run something like that :
387     * sed -ne '/digr''aph/,/}/s/ *.//p' syntax.php | dot -Tpng >/tmp/graph.png
388     * digraph {
389     *   node [shape=circle];
390     *   0 -> 0 [label="\\s"]
391     *   0 -> 1 [label="\""]
392     *   0 -> 3 [label="'"]
393     *   0 -> 6 [label="\\ [+]"]
394     *   0 -> 7 [label="= [+]"]
395     *   0 -> 5 [label=". [+]"]
396     *
397     *   1 -> 2 [label="\\ [+]"]
398     *   1 -> 0 [label="\" [A]"]
399     *   1 -> 1 [label=". [+]"]
400     *   2 -> 1 [label="[\"\\] [-]"]
401     *   2 -> 8 [label=". [+]"]
402     *   8 -> 2 [label="\\ [+]"]
403     *   8 -> 0 [label="\" [A]"]
404     *   8 -> 1 [label=". [+]"]
405     *
406     *   3 -> 4 [label="\\ [+]"]
407     *   3 -> 0 [label="' [A]"]
408     *   3 -> 3 [label=". [+]"]
409     *   4 -> 3 [label="['\\] [-]"]
410     *   4 -> 9 [label=". [+]"]
411     *   9 -> 4 [label="\\ [+]"]
412     *   9 -> 0 [label="' [A]"]
413     *   9 -> 3 [label=". [+]"]
414     *
415     *   5 -> 6 [label="\\ [+]"]
416     *   5 -> 0 [label="\\s [A]"]
417     *   5 -> 7 [label="= [A+]"]
418     *   5 -> 1 [label="\" [A]"]
419     *   5 -> 3 [label="' [A]"]
420     *   5 -> 5 [label=". [+]"]
421     *
422     *   6 -> 5 [label="[\"' =\\>] [-]"]
423     *   6 -> 5 [label=". [+]"]
424     *
425     *   7 -> 0 [label="\\s [A]"]
426     *   7 -> 1 [label="\" [A]"]
427     *   7 -> 3 [label="' [A]"]
428     *   7 -> 6 [label="\\ [A+]"]
429     *   7 -> 5 [label=". [A+]"]
430     *   e [shape=box,label="arc label : current char [actions]\n+: add current char to token\n-: replace last char in token with current char\nA: Accept cur token. New token\nInitial state: 0\nValid end states : 0, 5, 7"]
431     * }
432     *
433     * @author Schplurtz le Déboulonné
434     * @param $str String The string to tokenize
435     * @return String[] An array of tokens
436     */
437    protected function _tokenize( $str ) {
438        $trs=array( //array ( current chart > new state,... )
439            0 => array( ' ' => 0, "\t" => 0, '"' => 1, "'" => 3, '\\' => 6, '=' => 7, 'def' => 5 ),
440            1 => array( '\\' => 2, '"'  => 0, 'def' => 1 ),
441            2 => array( 'def' => 1 ),
442            3 => array( '\\' => 4, "'"  => 0, 'def' => 3 ),
443            4 => array( 'def' => 3 ),
444            5 => array( '\\' => 6, ' '  => 0, "\t"  => 0, '=' => 7, '"' => 1, "'" => 3, 'def' => 5),
445            6 => array( 'def' => 5 ),
446            7 => array( ' ' => 0, "\t" => 0, '"' => 1, "'" => 3, "\\" => 6, 'def' => 5),
447        );
448        $acs=array( // new state => action OR new state => array ( char => action, char => action... )
449            0 => array( 6 => '+', 7 => '+', 5 => '+',),
450            1 => array( 2 => '+', 0 => 'A', 1 => '+',),
451            2 => array( 1 => array( '"' => '-', "\\" => '-', 'def' => '+')),
452            3 => array( 4 => '+', 0 => 'A', 3 => '+',),
453            4 => array( 3 => array( "'" => '-', "\\" => '-', 'def' => '+',)),
454            5 => array( 6 => '+', 0 => 'A', 7 => 'A+', 1 => 'A', 3 => 'A', 5 => '+',),
455            6 => array( 5 => array( '"' => '-', "'" => '-', ' ' => '-', '=' => '-', "\\" => '-', '>' => '-', 'def' =>'+'),),
456            7 => array( 0 => 'A', 1 => 'A', 3 => 'A', 6 => 'A+', 5 => 'A+',),
457        );
458
459        $toks=array();
460        $tok='';
461        $state=0;
462        foreach( str_split($str) as $c ) {
463            $to = array_key_exists($c, $trs[$state]) ? $trs[$state][$c] : $trs[$state]['def'];
464            if( array_key_exists($to, $acs[$state]) ) {
465                $action=$acs[$state][$to];
466                if(is_array($action)) {
467                  $action = array_key_exists($c, $action) ? $action[$c] : $action['def'];
468                }
469                switch($action) {
470                case '+'  : $tok .= $c; break;
471                case '-'  : $tok = substr($tok, 0, -1).$c; break;
472                case 'A'  : $toks[] = $tok; $tok=''; break;
473                case 'A+' : $toks[] = $tok; $tok=$c; break;
474                }
475            }
476            $state=$to;
477        }
478        if($tok != '') {
479            if ($state == 0 || $state == 5 || $state == 7)
480                $toks[] = $tok;
481            else
482                msg( 'In &lt;cli ...>, ignored malformed text «'.hsc($tok).'».', 2, '', '', MSG_USERS_ONLY );
483        }
484
485        if($this->getConf('debug'))
486          msg( "line <pre>«".hsc($str)."»</pre>parsed as :<pre>".hsc(print_r($toks,1))."</pre>" );
487        return $toks;
488    }
489
490    /**
491     *
492     * parse params of "<cli param...>" line
493     *
494     * param is expected to be a blank separated list of foo[=bar]
495     * statement. When there is no =bar part, then t=foo is assumed.
496     * The last non assigment statement will overwrite all the others.
497     * For example, for  «a=b c = "d" zorg», the returned
498     * array will be ( 'a' => 'b', 'c' => 'd', 't' => 'zorg' ).
499     *
500     * @author Schplurtz le Déboulonné
501     * @param $str String The string to tokenize
502     * @return array The associative array of tokens
503     */
504    protected function _parseparams( $str ) {
505        if($this->getConf('debug'))
506          msg('_parseparams calling _tokenize("'.hsc($str).'")', 1);
507        $toks=$this->_tokenize($str);
508        $n=count($toks) ;
509        $values=array( 'prompt' => false, 'continue' => false, 'comment' => false,
510                       'type' => false, 'style' => '', );
511
512        // check tokens by triplet.
513        for( $i = 0; $i < $n - 2; ++$i ) {
514            if( $toks[$i + 1] === '=' ) {
515                $key=$this->_map($toks[$i]);
516                if($key) {
517                    if( $values[$key] !== false) {
518                        msg( 'In &lt;cli ...>, value «'.hsc($toks[$i+2]).'» override previously defined '.hsc($key).' «'. hsc($values[$key]).'».', 2, '', '', MSG_USERS_ONLY );
519                    }
520                    $values[$key]=$toks[$i+2];
521                }
522                else {
523                    msg( 'Error, unknown attribute «' . hsc($toks[$i]) . '» in &lt;cli> parametre', -1, '', '', MSG_USERS_ONLY );
524                }
525                $i += 2;
526            }
527            else {
528                // if not format X = Y, add current token to style.
529                $values['style'].=' '.$toks[$i];
530            }
531        }
532        // add 1 or 2 remaining tokens to style
533        for( ; $i < $n; ++$i ) {
534            $values['style'].=' '.$toks[$i];
535        }
536        return $values;
537    }
538
539    /**
540     * check <cli param names and maps them to canonical values.
541     *
542     * @author       Schplurtz le Déboulonné <schplurtz@laposte.net>
543     * @return  Mixed    canonical attribute name or false if attr is unknown
544     *                   One of 'type', 'prompt', 'continue', 'comment'.
545     */
546    protected function _map( $s ) {
547        if( $s == 'lang' || $s == 'language' || $s == 'type' || $s == 't' || $s == 'l' || $s == 'lng' )
548            return 'type';
549        if( $s == 'prompt' )
550            return 'prompt';
551        if( $s == 'continue' || $s == 'cont' )
552            return 'continue';
553        if( $s == 'comment' )
554            return 'comment';
555        return false;
556    }
557}
558