1<?php
2/**
3 * DokuWiki Plugin ExtTab3 (Syntax component)
4 *
5 * Allows extended (MediaWiki-style) tables inside DokuWiki
6 *
7 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
8 * @author     Satoshi Sahara <sahara.satoshi@gmail.com>
9 */
10class syntax_plugin_exttab3 extends DokuWiki_Syntax_Plugin
11{
12    public function getType()
13    {   // Syntax Type
14        return 'container';
15    }
16
17    public function getAllowedTypes()
18    {   // Allowed Mode Types
19        return array(
20            'container',
21            'formatting',
22            'substition',
23            'disabled',
24            'protected',
25            'paragraphs',
26        );
27    }
28
29    public function getPType()
30    {   // Paragraph Type
31        return 'block';
32    }
33
34
35    protected $stack = array();  // stack of current open tag - used by handle() method
36    protected $tagsmap;
37    protected $attrsmap;
38
39    /**
40     * Connect pattern to lexer
41     */
42    protected $mode;
43
44    public function preConnect()
45    {
46        // drop 'syntax_' from class name
47        $this->mode = substr(get_class($this), 7);
48    }
49
50    public function connectTo($mode)
51    {
52        // table start:  {| attrs
53        $this->Lexer->addEntryPattern('\n\{\|[^\n]*',$mode, $this->mode);
54    }
55
56    public function postConnect()
57    {
58        // table end:    |}
59        $this->Lexer->addExitPattern('[ \t]*\n\|\}', $this->mode);
60
61        // match pattern for attributes
62        $attrs = '[^\n\{\|\!\[]+';
63
64        // caption:      |+ attrs | caption
65        $this->Lexer->addPattern("\n\|\+ *(?:$attrs\|(?!\|))?", $this->mode);
66        // table row:    |- attrs
67        $this->Lexer->addPattern(' *?\n\|\-+[^\n]*', $this->mode);
68        // table header: ! attrs |
69        $this->Lexer->addPattern("(?: *?\n|\!)\!(?:$attrs\|(?!\|))?", $this->mode);
70        // table data:   | attrs |
71        $this->Lexer->addPattern("(?: *?\n|\|)\|(?:$attrs\|(?!\|))?", $this->mode);
72    }
73
74    public function accepts($mode)
75    {   // plugin accepts its own entry syntax
76        if ($mode == $this->mode) return true;
77        return parent::accepts($mode);
78    }
79
80    public function getSort()
81    {   // sort number used to determine priority of this mode
82        return 59; // = Doku_Parser_Mode_table-1
83    }
84
85
86    /**
87     * helper function to simplify writing plugin calls to the instruction list
88     * first three arguments are passed to function render as $data
89     */
90    protected function writeCall($tag, $attr, $state, $pos, $match, $handler)
91    {
92        $data = array($state, $tag, $attr);
93        $handler->addPluginCall($this->getPluginName(), $data, $state, $pos, $match);
94    }
95
96    protected function open($tag, $attr, $pos, $match, $handler)
97    {
98      //$this->writeCall($tag,$attr,DOKU_LEXER_ENTER, $pos,$match,$handler);
99        $match = array(DOKU_LEXER_ENTER, $tag, $attr);
100        $handler->plugin($match, 'addPluginCall', $pos, $this->getPluginName());
101    }
102
103    protected function close($tag, $pos, $match, $handler)
104    {
105      //$this->writeCall($tag,'',DOKU_LEXER_EXIT, $pos,$match,$handler);
106        $match = array(DOKU_LEXER_EXIT, $tag, $attr);
107        $handler->plugin($match, 'addPluginCall', $pos, $this->getPluginName());
108    }
109
110    /**
111     * helper function for exttab syntax translation to html
112     *
113     * @param string $match       matched string
114     * @return array              tag name and attributes
115     */
116    protected function interpret($match = '')
117    {
118        $markup = ltrim($match);
119        $len = 2;
120        switch (substr($markup, 0, $len)) {
121            case '{|': $tag = 'table';   break;
122            case '|}': $tag = '/table';  break;
123            case '|+': $tag = 'caption'; break;
124            case '|-': $tag = 'tr';      break;
125            case '||': $tag = 'td';      break;
126            case '!!': $tag = 'th';      break;
127            default:
128                $len = 1;
129                switch (substr($markup, 0, $len)) {
130                    case '!': $tag = 'th'; break;
131                    case '|': $tag = 'td'; break;
132                }
133        }
134        if (isset($tag)) {
135            $attrs = substr($markup, $len);
136            return array($tag, $attrs);
137        } else {
138            msg($this->getPluginName().' ERROR: unknown syntax: '.hsc($markup) ,-1);
139            return false;
140        }
141    }
142
143    /**
144     * append specified class name to attributes
145     *
146     * @param string $class       class name
147     * @param string $attr        attributes of html tag
148     * @return string             modified $attr
149     */
150    private function appendClass($class, $attr)
151    {
152        $regex = "/\b(?:class=\")(.*?\b($class)?\b.*?)\"/";
153        preg_match($regex, $attr, $matches);
154        if ($matches[2]) {
155            // $class found in the class attribute
156            return $attr;
157        } elseif (empty($matches[0])) {
158            // class attribute is not specified
159            return $attr.' class="'.$class.'"';
160        } else {
161            // class attribute is specified, but include $class
162            $items = explode(' ',$matches[1]);
163            $items[] = $class;
164            $replace = '$class="'.implode(' ',$items).'"';
165            return str_replace($matches[0], $replace, $attr);
166        }
167    }
168
169
170    /**
171     * Handle the match
172     */
173    public function handle($match, $state, $pos, Doku_Handler $handler)
174    {
175        switch ($state) {
176            case 'addPluginCall':
177                // write plugin instruction to call list of the handler
178                // Note: $match is array, not matched text
179                return $data = $match;
180
181            case DOKU_LEXER_ENTER:
182                // table start
183                list($tag, $attr) = $this->interpret($match);
184                // ensure that class attribute cotains "exttable"
185                $attr = $this->appendClass('exttable', $attr);
186                array_push($this->stack, $tag);
187                $this->open($tag, $attr, $pos, $match, $handler);
188                break;
189            case DOKU_LEXER_EXIT:
190                do { // rewind table
191                    $oldtag = array_pop($this->stack);
192                    $this->close($oldtag, $pos, $match, $handler);
193                } while ($oldtag != 'table');
194                break;
195            case DOKU_LEXER_MATCHED:
196                $tag_prev = end($this->stack);
197                list($tag, $attr) = $this->interpret($match);
198                switch ($tag_prev) {
199                    case 'caption':
200                                $oldtag = array_pop($this->stack);
201                                $this->close($oldtag, $pos, $match, $handler);
202                    case 'table':
203                        switch ($tag) {
204                            case 'caption':
205                            case 'tr':
206                                array_push($this->stack, $tag);
207                                $this->open($tag, $attr, $pos, $match, $handler);
208                                break;
209                            case 'th':
210                            case 'td':
211                                array_push($this->stack, 'tr');
212                                $this->open('tr', '', $pos, $match, $handler);
213                                array_push($this->stack, $tag);
214                                $this->open($tag, $attr, $pos, $match, $handler);
215                                break;
216                        }
217                        break;
218                    case 'tr':
219                        switch ($tag) {
220                            case 'caption':
221                                msg($this->getPluginName().' Syntax ERROR: match='.hsc(trim($match)) ,-1);
222                                break;
223                            case 'tr':
224                                $oldtag = array_pop($this->stack);
225                                $this->close($oldtag, $pos, $match, $handler);
226                                array_push($this->stack, $tag);
227                                $this->open($tag, $attr, $pos, $match, $handler);
228                                break;
229                            case 'th':
230                            case 'td':
231                                array_push($this->stack, $tag);
232                                $this->open($tag, $attr, $pos, $match, $handler);
233                                break;
234                        }
235                        break;
236                    case 'th':
237                    case 'td':
238                        switch ($tag) {
239                            case 'caption':
240                                msg($this->getPluginName().' Syntax ERROR: match='.hsc(trim($match)) ,-1);
241                                break;
242                            case 'tr':
243                                do { // rewind old row prior to start new row
244                                    $oldtag = array_pop($this->stack);
245                                    $this->close($oldtag, $pos, $match, $handler);
246                                } while ($oldtag != 'tr');
247                                array_push($this->stack, $tag);
248                                $this->open($tag, $attr, $pos, $match, $handler);
249                                break;
250                            case 'th':
251                            case 'td':
252                                $oldtag = array_pop($this->stack);
253                                $this->close($oldtag, $pos, $match, $handler);
254                                array_push($this->stack, $tag);
255                                $this->open($tag, $attr, $pos, $match, $handler);
256                                break;
257                        }
258                        break;
259                }
260                break;
261            case DOKU_LEXER_UNMATCHED:
262                $tag_prev = end($this->stack);
263                switch ($tag_prev) {
264                    case 'caption':
265                                // cdata --- use base() instead of $this->_writeCall()
266                                $handler->base($match, $state, $pos);
267                                break;
268                    case 'table':
269                                array_push($this->stack, 'tr');
270                                $this->open('tr', '', $pos, $match, $handler);
271                    case 'tr':
272                                array_push($this->stack, 'td');
273                                $this->open('td', '', $pos, $match, $handler);
274                    case 'th':
275                    case 'td':
276                                // cdata --- use base() instead of $this->_writeCall()
277                                $handler->base($match, $state, $pos);
278                                break;
279                }
280                break;
281        }
282        return false;
283    }
284
285
286   /**
287    * Create output
288    */
289    public function render($format, Doku_Renderer $renderer, $data)
290    {
291        if (empty($data)) return false;
292
293        switch ($format) {
294            case 'xhtml' :
295                return $this->render_xhtml($renderer, $data);
296            case 'odt'   :
297            case 'odt_pdf':
298                $odt = $this->loadHelper('exttab3_odt');
299                return $odt->render($renderer, $data);
300            default:
301                return false;
302        }
303    }
304
305    protected function render_xhtml(Doku_Renderer $renderer, $data)
306    {
307        // prepare class properties
308        isset($this->tagsmap) || $this->setTagsmap();
309        isset($this->attrmap) || $this->setAllowedAttributes();
310
311        list($state, $tag, $attr) = $data;
312
313        switch ($state) {
314            case DOKU_LEXER_ENTER:    // open tag
315                $renderer->doc.= $this->tag_open($tag, $attr);
316                break;
317            case DOKU_LEXER_MATCHED:  // defensive, shouldn't occur
318            case DOKU_LEXER_UNMATCHED:
319                $renderer->cdata($tag);
320                break;
321            case DOKU_LEXER_EXIT:     // close tag
322                $renderer->doc.= $this->tag_close($tag);
323                break;
324        }
325        return true;
326    }
327
328    /**
329     * open a exttab tag, used in render_xhtml()
330     *
331     * @param  string $tag        'table','caption','tr','th' or 'td'
332     * @param  string $attr       attibutes of tag element
333     * @return string             html used to open the tag
334     */
335    protected function tag_open($tag, $attr = null)
336    {
337        $before = $this->tagsmap[$tag][0];
338        $after  = $this->tagsmap[$tag][1];
339        $attr = $this->cleanAttrString($attr, $this->attrsmap);
340        return $before.'<'.$tag.$attr.'>'.$after;
341    }
342
343    /**
344     * close a exttab tag, used in render_xhtml()
345     *
346     * @param  string $tag        'table','caption','tr','th' or 'td'
347     * @return string             html used to close the tag
348     */
349    protected function tag_close($tag)
350    {
351        $before = $this->tagsmap['/'.$tag][0];
352        $after  = $this->tagsmap['/'.$tag][1];
353        return $before.'</'.$tag.'>'.$after;
354    }
355
356    /**
357     * prepare tagsmap used in tag_open() and tag_close()
358     */
359    protected function setTagsmap()
360    {
361        // define name, prefix and postfix of tags
362        $this->tagsmap = array(
363                'table'    => array("", "\n" ),     // table start  : {|
364                '/table'   => array("", "\n"),      // table end    : |}
365                'caption'  => array("", ""),        // caption      : |+
366                '/caption' => array("", "\n"),
367                'tr'       => array("", "\n"),      // table row    : |-
368                '/tr'      => array("", "\n"),
369                'th'       => array("", ""),        // table header : !
370                '/th'      => array("", "\n"),
371                'td'       => array("", ""),        // table data   : |
372                '/td'      => array("", "\n"),
373        );
374    }
375
376    /**
377     * prepare attrsmap used in cleanAttrString()
378     */
379    protected function setAllowedAttributes()
380    {
381        // define allowable attibutes for table tags
382        $this->attrsmap = array(
383            // html5 HTML Global Attributes
384            'accesskey', 'class', 'contenteditable', 'contextmenu',
385            'dir', 'draggable', 'dropzone', 'hidden', 'id', 'lang',
386            'spellcheck', 'style', 'tabindex', 'title', 'translate',
387            'xml:lang',
388            // html5 table tag
389            'border', 'sortable',
390            // html5 th and td tag
391            'abbr', 'colspan', 'headers', 'rowspan', 'scope', 'sorted',
392            // deprecated in html5
393            'align', 'valign', 'width', 'height', 'bgcolor', 'nowrap',
394        );
395    }
396
397
398    /**
399     * Make the attribute string safe to avoid XSS attacks.
400     *
401     * @author Ashish Myles <marcianx@gmail.com>
402     *
403     * @param  string $attr           attibutes to be checked
404     * @param  array  $allowed_keys   allowed attribute name map
405     *                                ex: array('border','bgcolor');
406     * @return string                 cleaned attibutes
407     *
408     * WATCH OUT FOR
409     * - event handlers (e.g. onclick="javascript:...", etc)
410     * - CSS (e.g. background: url(javascript:...))
411     * - closing the tag and opening a new one
412     * WHAT IS DONE
413     * - turn all whitespace into ' ' (to protect from removal)
414     * - remove all non-printable characters and < and >
415     * - parse and filter attributes using a whitelist
416     * - styles with 'url' in them are altogether removed
417     * (I know this is brutally aggressive and doesn't allow
418     * some safe stuff, but better safe than sorry.)
419     * NOTE: Attribute values MUST be in quotes now.
420     */
421    protected function cleanAttrString($attr = '', $allowed_keys)
422    {
423        if (is_null($attr)) return null;
424
425        // Keep spaces simple
426        $attr = trim(preg_replace('/\s+/', ' ', $attr));
427        // Remove non-printable characters and angle brackets
428        $attr = preg_replace('/[<>[:^print:]]+/', '', $attr);
429        // This regular expression parses the value of an attribute and
430        // the quotation marks surrounding it.
431        // It assumes that all quotes within the value itself must be escaped,
432        // which is not technically true.
433        // To keep the parsing simple (no look-ahead), the value must be in
434        // quotes.
435        $val = "([\"'`])(?:[^\\\\\"'`]|\\\\.)*\g{-1}";
436
437        $nattr = preg_match_all("/(\w+)\s*=\s*($val)/", $attr, $matches, PREG_SET_ORDER);
438        if (!$nattr) return null;
439
440        $clean_attr = '';
441        for ($i = 0; $i < $nattr; ++$i) {
442            $m = $matches[$i];
443            $attrname = strtolower($m[1]);
444            $attrval  = $m[2];
445            // allow only recognized attributes
446            if (in_array($attrname, $allowed_keys, true)) {
447                // make sure that style attributes do not have a url in them
448                if ($attrname != 'style' ||
449                      (stristr($attrval, 'url') === false &&
450                      stristr($attrval, 'import') === false)) {
451                    $clean_attr.= " $attrname=$attrval";
452                }
453            }
454        }
455        return $clean_attr;
456    }
457
458}
459