1<?php 2/** 3 * DokuWiki Plugin ExtTab3 (Syntax component) 4 * 5 * Allows extended (MediaWiki-style) tables inside DokuWiki 6 * 7 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 8 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 9 */ 10class syntax_plugin_exttab3 extends DokuWiki_Syntax_Plugin 11{ 12 public function getType() 13 { // Syntax Type 14 return 'container'; 15 } 16 17 public function getAllowedTypes() 18 { // Allowed Mode Types 19 return array( 20 'container', 21 'formatting', 22 'substition', 23 'disabled', 24 'protected', 25 'paragraphs', 26 ); 27 } 28 29 public function getPType() 30 { // Paragraph Type 31 return 'block'; 32 } 33 34 35 protected $stack = array(); // stack of current open tag - used by handle() method 36 protected $tagsmap; 37 protected $attrsmap; 38 39 /** 40 * Connect pattern to lexer 41 */ 42 protected $mode; 43 44 public function preConnect() 45 { 46 // drop 'syntax_' from class name 47 $this->mode = substr(get_class($this), 7); 48 } 49 50 public function connectTo($mode) 51 { 52 // table start: {| attrs 53 $this->Lexer->addEntryPattern('\n\{\|[^\n]*',$mode, $this->mode); 54 } 55 56 public function postConnect() 57 { 58 // table end: |} 59 $this->Lexer->addExitPattern('[ \t]*\n\|\}', $this->mode); 60 61 // match pattern for attributes 62 $attrs = '[^\n\{\|\!\[]+'; 63 64 // caption: |+ attrs | caption 65 $this->Lexer->addPattern("\n\|\+ *(?:$attrs\|(?!\|))?", $this->mode); 66 // table row: |- attrs 67 $this->Lexer->addPattern(' *?\n\|\-+[^\n]*', $this->mode); 68 // table header: ! attrs | 69 $this->Lexer->addPattern("(?: *?\n|\!)\!(?:$attrs\|(?!\|))?", $this->mode); 70 // table data: | attrs | 71 $this->Lexer->addPattern("(?: *?\n|\|)\|(?:$attrs\|(?!\|))?", $this->mode); 72 } 73 74 public function accepts($mode) 75 { // plugin accepts its own entry syntax 76 if ($mode == $this->mode) return true; 77 return parent::accepts($mode); 78 } 79 80 public function getSort() 81 { // sort number used to determine priority of this mode 82 return 59; // = Doku_Parser_Mode_table-1 83 } 84 85 86 /** 87 * helper function to simplify writing plugin calls to the instruction list 88 * first three arguments are passed to function render as $data 89 */ 90 protected function writeCall($tag, $attr, $state, $pos, $match, $handler) 91 { 92 $data = array($state, $tag, $attr); 93 $handler->addPluginCall($this->getPluginName(), $data, $state, $pos, $match); 94 } 95 96 protected function open($tag, $attr, $pos, $match, $handler) 97 { 98 //$this->writeCall($tag,$attr,DOKU_LEXER_ENTER, $pos,$match,$handler); 99 $match = array(DOKU_LEXER_ENTER, $tag, $attr); 100 $handler->plugin($match, 'addPluginCall', $pos, $this->getPluginName()); 101 } 102 103 protected function close($tag, $pos, $match, $handler) 104 { 105 //$this->writeCall($tag,'',DOKU_LEXER_EXIT, $pos,$match,$handler); 106 $match = array(DOKU_LEXER_EXIT, $tag, $attr); 107 $handler->plugin($match, 'addPluginCall', $pos, $this->getPluginName()); 108 } 109 110 /** 111 * helper function for exttab syntax translation to html 112 * 113 * @param string $match matched string 114 * @return array tag name and attributes 115 */ 116 protected function interpret($match = '') 117 { 118 $markup = ltrim($match); 119 $len = 2; 120 switch (substr($markup, 0, $len)) { 121 case '{|': $tag = 'table'; break; 122 case '|}': $tag = '/table'; break; 123 case '|+': $tag = 'caption'; break; 124 case '|-': $tag = 'tr'; break; 125 case '||': $tag = 'td'; break; 126 case '!!': $tag = 'th'; break; 127 default: 128 $len = 1; 129 switch (substr($markup, 0, $len)) { 130 case '!': $tag = 'th'; break; 131 case '|': $tag = 'td'; break; 132 } 133 } 134 if (isset($tag)) { 135 $attrs = substr($markup, $len); 136 return array($tag, $attrs); 137 } else { 138 msg($this->getPluginName().' ERROR: unknown syntax: '.hsc($markup) ,-1); 139 return false; 140 } 141 } 142 143 /** 144 * append specified class name to attributes 145 * 146 * @param string $class class name 147 * @param string $attr attributes of html tag 148 * @return string modified $attr 149 */ 150 private function appendClass($class, $attr) 151 { 152 $regex = "/\b(?:class=\")(.*?\b($class)?\b.*?)\"/"; 153 preg_match($regex, $attr, $matches); 154 if ($matches[2]) { 155 // $class found in the class attribute 156 return $attr; 157 } elseif (empty($matches[0])) { 158 // class attribute is not specified 159 return $attr.' class="'.$class.'"'; 160 } else { 161 // class attribute is specified, but include $class 162 $items = explode(' ',$matches[1]); 163 $items[] = $class; 164 $replace = '$class="'.implode(' ',$items).'"'; 165 return str_replace($matches[0], $replace, $attr); 166 } 167 } 168 169 170 /** 171 * Handle the match 172 */ 173 public function handle($match, $state, $pos, Doku_Handler $handler) 174 { 175 switch ($state) { 176 case 'addPluginCall': 177 // write plugin instruction to call list of the handler 178 // Note: $match is array, not matched text 179 return $data = $match; 180 181 case DOKU_LEXER_ENTER: 182 // table start 183 list($tag, $attr) = $this->interpret($match); 184 // ensure that class attribute cotains "exttable" 185 $attr = $this->appendClass('exttable', $attr); 186 array_push($this->stack, $tag); 187 $this->open($tag, $attr, $pos, $match, $handler); 188 break; 189 case DOKU_LEXER_EXIT: 190 do { // rewind table 191 $oldtag = array_pop($this->stack); 192 $this->close($oldtag, $pos, $match, $handler); 193 } while ($oldtag != 'table'); 194 break; 195 case DOKU_LEXER_MATCHED: 196 $tag_prev = end($this->stack); 197 list($tag, $attr) = $this->interpret($match); 198 switch ($tag_prev) { 199 case 'caption': 200 $oldtag = array_pop($this->stack); 201 $this->close($oldtag, $pos, $match, $handler); 202 case 'table': 203 switch ($tag) { 204 case 'caption': 205 case 'tr': 206 array_push($this->stack, $tag); 207 $this->open($tag, $attr, $pos, $match, $handler); 208 break; 209 case 'th': 210 case 'td': 211 array_push($this->stack, 'tr'); 212 $this->open('tr', '', $pos, $match, $handler); 213 array_push($this->stack, $tag); 214 $this->open($tag, $attr, $pos, $match, $handler); 215 break; 216 } 217 break; 218 case 'tr': 219 switch ($tag) { 220 case 'caption': 221 msg($this->getPluginName().' Syntax ERROR: match='.hsc(trim($match)) ,-1); 222 break; 223 case 'tr': 224 $oldtag = array_pop($this->stack); 225 $this->close($oldtag, $pos, $match, $handler); 226 array_push($this->stack, $tag); 227 $this->open($tag, $attr, $pos, $match, $handler); 228 break; 229 case 'th': 230 case 'td': 231 array_push($this->stack, $tag); 232 $this->open($tag, $attr, $pos, $match, $handler); 233 break; 234 } 235 break; 236 case 'th': 237 case 'td': 238 switch ($tag) { 239 case 'caption': 240 msg($this->getPluginName().' Syntax ERROR: match='.hsc(trim($match)) ,-1); 241 break; 242 case 'tr': 243 do { // rewind old row prior to start new row 244 $oldtag = array_pop($this->stack); 245 $this->close($oldtag, $pos, $match, $handler); 246 } while ($oldtag != 'tr'); 247 array_push($this->stack, $tag); 248 $this->open($tag, $attr, $pos, $match, $handler); 249 break; 250 case 'th': 251 case 'td': 252 $oldtag = array_pop($this->stack); 253 $this->close($oldtag, $pos, $match, $handler); 254 array_push($this->stack, $tag); 255 $this->open($tag, $attr, $pos, $match, $handler); 256 break; 257 } 258 break; 259 } 260 break; 261 case DOKU_LEXER_UNMATCHED: 262 $tag_prev = end($this->stack); 263 switch ($tag_prev) { 264 case 'caption': 265 // cdata --- use base() instead of $this->_writeCall() 266 $handler->base($match, $state, $pos); 267 break; 268 case 'table': 269 array_push($this->stack, 'tr'); 270 $this->open('tr', '', $pos, $match, $handler); 271 case 'tr': 272 array_push($this->stack, 'td'); 273 $this->open('td', '', $pos, $match, $handler); 274 case 'th': 275 case 'td': 276 // cdata --- use base() instead of $this->_writeCall() 277 $handler->base($match, $state, $pos); 278 break; 279 } 280 break; 281 } 282 return false; 283 } 284 285 286 /** 287 * Create output 288 */ 289 public function render($format, Doku_Renderer $renderer, $data) 290 { 291 if (empty($data)) return false; 292 293 switch ($format) { 294 case 'xhtml' : 295 return $this->render_xhtml($renderer, $data); 296 case 'odt' : 297 case 'odt_pdf': 298 $odt = $this->loadHelper('exttab3_odt'); 299 return $odt->render($renderer, $data); 300 default: 301 return false; 302 } 303 } 304 305 protected function render_xhtml(Doku_Renderer $renderer, $data) 306 { 307 // prepare class properties 308 isset($this->tagsmap) || $this->setTagsmap(); 309 isset($this->attrmap) || $this->setAllowedAttributes(); 310 311 list($state, $tag, $attr) = $data; 312 313 switch ($state) { 314 case DOKU_LEXER_ENTER: // open tag 315 $renderer->doc.= $this->tag_open($tag, $attr); 316 break; 317 case DOKU_LEXER_MATCHED: // defensive, shouldn't occur 318 case DOKU_LEXER_UNMATCHED: 319 $renderer->cdata($tag); 320 break; 321 case DOKU_LEXER_EXIT: // close tag 322 $renderer->doc.= $this->tag_close($tag); 323 break; 324 } 325 return true; 326 } 327 328 /** 329 * open a exttab tag, used in render_xhtml() 330 * 331 * @param string $tag 'table','caption','tr','th' or 'td' 332 * @param string $attr attibutes of tag element 333 * @return string html used to open the tag 334 */ 335 protected function tag_open($tag, $attr = null) 336 { 337 $before = $this->tagsmap[$tag][0]; 338 $after = $this->tagsmap[$tag][1]; 339 $attr = $this->cleanAttrString($attr, $this->attrsmap); 340 return $before.'<'.$tag.$attr.'>'.$after; 341 } 342 343 /** 344 * close a exttab tag, used in render_xhtml() 345 * 346 * @param string $tag 'table','caption','tr','th' or 'td' 347 * @return string html used to close the tag 348 */ 349 protected function tag_close($tag) 350 { 351 $before = $this->tagsmap['/'.$tag][0]; 352 $after = $this->tagsmap['/'.$tag][1]; 353 return $before.'</'.$tag.'>'.$after; 354 } 355 356 /** 357 * prepare tagsmap used in tag_open() and tag_close() 358 */ 359 protected function setTagsmap() 360 { 361 // define name, prefix and postfix of tags 362 $this->tagsmap = array( 363 'table' => array("", "\n" ), // table start : {| 364 '/table' => array("", "\n"), // table end : |} 365 'caption' => array("", ""), // caption : |+ 366 '/caption' => array("", "\n"), 367 'tr' => array("", "\n"), // table row : |- 368 '/tr' => array("", "\n"), 369 'th' => array("", ""), // table header : ! 370 '/th' => array("", "\n"), 371 'td' => array("", ""), // table data : | 372 '/td' => array("", "\n"), 373 ); 374 } 375 376 /** 377 * prepare attrsmap used in cleanAttrString() 378 */ 379 protected function setAllowedAttributes() 380 { 381 // define allowable attibutes for table tags 382 $this->attrsmap = array( 383 // html5 HTML Global Attributes 384 'accesskey', 'class', 'contenteditable', 'contextmenu', 385 'dir', 'draggable', 'dropzone', 'hidden', 'id', 'lang', 386 'spellcheck', 'style', 'tabindex', 'title', 'translate', 387 'xml:lang', 388 // html5 table tag 389 'border', 'sortable', 390 // html5 th and td tag 391 'abbr', 'colspan', 'headers', 'rowspan', 'scope', 'sorted', 392 // deprecated in html5 393 'align', 'valign', 'width', 'height', 'bgcolor', 'nowrap', 394 ); 395 } 396 397 398 /** 399 * Make the attribute string safe to avoid XSS attacks. 400 * 401 * @author Ashish Myles <marcianx@gmail.com> 402 * 403 * @param string $attr attibutes to be checked 404 * @param array $allowed_keys allowed attribute name map 405 * ex: array('border','bgcolor'); 406 * @return string cleaned attibutes 407 * 408 * WATCH OUT FOR 409 * - event handlers (e.g. onclick="javascript:...", etc) 410 * - CSS (e.g. background: url(javascript:...)) 411 * - closing the tag and opening a new one 412 * WHAT IS DONE 413 * - turn all whitespace into ' ' (to protect from removal) 414 * - remove all non-printable characters and < and > 415 * - parse and filter attributes using a whitelist 416 * - styles with 'url' in them are altogether removed 417 * (I know this is brutally aggressive and doesn't allow 418 * some safe stuff, but better safe than sorry.) 419 * NOTE: Attribute values MUST be in quotes now. 420 */ 421 protected function cleanAttrString($attr = '', $allowed_keys) 422 { 423 if (is_null($attr)) return null; 424 425 // Keep spaces simple 426 $attr = trim(preg_replace('/\s+/', ' ', $attr)); 427 // Remove non-printable characters and angle brackets 428 $attr = preg_replace('/[<>[:^print:]]+/', '', $attr); 429 // This regular expression parses the value of an attribute and 430 // the quotation marks surrounding it. 431 // It assumes that all quotes within the value itself must be escaped, 432 // which is not technically true. 433 // To keep the parsing simple (no look-ahead), the value must be in 434 // quotes. 435 $val = "([\"'`])(?:[^\\\\\"'`]|\\\\.)*\g{-1}"; 436 437 $nattr = preg_match_all("/(\w+)\s*=\s*($val)/", $attr, $matches, PREG_SET_ORDER); 438 if (!$nattr) return null; 439 440 $clean_attr = ''; 441 for ($i = 0; $i < $nattr; ++$i) { 442 $m = $matches[$i]; 443 $attrname = strtolower($m[1]); 444 $attrval = $m[2]; 445 // allow only recognized attributes 446 if (in_array($attrname, $allowed_keys, true)) { 447 // make sure that style attributes do not have a url in them 448 if ($attrname != 'style' || 449 (stristr($attrval, 'url') === false && 450 stristr($attrval, 'import') === false)) { 451 $clean_attr.= " $attrname=$attrval"; 452 } 453 } 454 } 455 return $clean_attr; 456 } 457 458} 459