1<?php 2if (! class_exists('syntax_plugin_code')) { 3 if (! defined('DOKU_PLUGIN')) { 4 if (! defined('DOKU_INC')) { 5 define('DOKU_INC', 6 realpath(dirname(__FILE__) . '/../../') . '/'); 7 } // if 8 define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/'); 9 } // if 10 // Include parent class: 11 require_once(DOKU_PLUGIN . 'syntax.php'); 12 // We're dealing with "GeSHi" here, hence include it: 13 require_once(DOKU_INC . 'inc/geshi.php'); 14 15/** 16 * <tt>syntax_plugin_code.php </tt>- A PHP4 class that implements the 17 * <tt>DokuWiki</tt> plugin for <tt>highlighting</tt> code fragments. 18 * 19 * <p> 20 * Usage:<br> 21 * <tt><code [language startno |[fh] text |[hs]]>...</code></tt> 22 * </p><pre> 23 * Copyright (C) 2006, 2008 M.Watermann, D-10247 Berlin, FRG 24 * All rights reserved 25 * EMail : <support@mwat.de> 26 * </pre><div class="disclaimer"> 27 * This program is free software; you can redistribute it and/or modify 28 * it under the terms of the GNU General Public License as published by 29 * the Free Software Foundation; either 30 * <a href="http://www.gnu.org/licenses/gpl.html">version 3</a> of the 31 * License, or (at your option) any later version.<br> 32 * This software is distributed in the hope that it will be useful, 33 * but WITHOUT ANY WARRANTY; without even the implied warranty of 34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 35 * General Public License for more details. 36 * </div> 37 * @author <a href="mailto:support@mwat.de">Matthias Watermann</a> 38 * @version <tt>$Id: syntax_plugin_code.php,v 1.29 2008/07/22 09:22:47 matthias Exp $</tt> 39 * @since created 24-Dec-2006 40 */ 41class syntax_plugin_code extends DokuWiki_Syntax_Plugin { 42 43 /** 44 * @privatesection 45 */ 46 //@{ 47 48 /** 49 * Additional markup used with older DokuWiki installations. 50 * 51 * @private 52 * @see _fixJS() 53 */ 54 var $_JSmarkup = FALSE; 55 56 /** 57 * Indention "text" used by <tt>_addLines()</tt>. 58 * 59 * <p> 60 * Note that we're using raw <em>UTF-8 NonBreakable Spaces</em> here. 61 * </p> 62 * @private 63 * @see _addLines() 64 */ 65 var $_lead = array('', ' ', ' ', ' ', ' ', 66 ' ', ' ', ' '); 67 68 69 /** 70 * Section counter for ODT export 71 * 72 * @private 73 * @see render() 74 * @since created 08-Jun-2008 75 */ 76 var $_odtSect = 0; 77 78 /** 79 * Prepare the markup to render the DIFF text. 80 * 81 * @param $aText String The DIFF text to markup. 82 * @param $aFormat String The DIFF format used ("u", "c", "n|r", "s"). 83 * @param $aDoc String Reference to the current renderer's 84 * <tt>doc</tt> property. 85 * @return Boolean <tt>TRUE</tt>. 86 * @private 87 * @see render() 88 */ 89 function _addDiff(&$aText, &$aFormat, &$aDoc) { 90 // Since we're inside a PRE block we need the leading LFs: 91 $ADD = "\n" . '<span class="diff_addedline">'; 92 $DEL = "\n" . '<span class="diff_deletedline">'; 93 $HEAD = "\n" . '<span class="diff_blockheader">'; 94 $CLOSE = '</span>'; 95 // Common headers for all formats; 96 // the RegEx needs at least ")#" appended! 97 $DiffHead = '#\n((?:diff\s[^\n]*)|(?:Index:\s[^\n]*)|(?:={60,})' 98 . '|(?:RCS file:\s[^\n]*)|(?:retrieving revision [0-9][^\n]*)'; 99 switch ($aFormat) { 100 case 'u': // unified output 101 $aDoc .= preg_replace( 102 array($DiffHead . '|(?:@@[^\n]*))#', 103 '|\n(\+[^\n]*)|', 104 '|\n(\-[^\n]*)|'), 105 array($HEAD . '\1' . $CLOSE, 106 $ADD . '\1' . $CLOSE, 107 $DEL . '\1' . $CLOSE), 108 $aText); 109 return TRUE; 110 case 'c': // context output 111 $sections = preg_split('|(\n\*{5,})|', 112 preg_replace($DiffHead . ')#', 113 $HEAD . '\1' . $CLOSE, 114 $aText), 115 -1, PREG_SPLIT_DELIM_CAPTURE); 116 $sections[0] = preg_replace( 117 array('|\n(\-{3}[^\n]*)|', 118 '|\n(\*{3}[^\n]*)|'), 119 array($ADD . '\1' . $CLOSE, 120 $DEL . '\1' . $CLOSE), 121 $sections[0]); 122 $c = count($sections); 123 for ($i = 1; $c > $i; ++$i) { 124 $hits = array(); 125 if (preg_match('|^\n(\*{5,})|', 126 $sections[$i], $hits)) { 127 unset($hits[0]); 128 $sections[$i] = $HEAD . $hits[1] . $CLOSE; 129 } else if (preg_match('|^\n(\x2A{3}\s[^\n]*)(.*)|s', 130 $sections[$i], $hits)) { 131 unset($hits[0]); // free mem 132 $parts = preg_split('|\n(\-{3}\s[^\n]*)|', 133 $hits[2], -1, PREG_SPLIT_DELIM_CAPTURE); 134 // $parts[0] == OLD code 135 $parts[0] = preg_replace('|\n([!\-][^\n]*)|', 136 $DEL . '\1' . $CLOSE, $parts[0]); 137 // $parts[1] == head of NEW code 138 $parts[1] = $ADD . $parts[1] . $CLOSE; 139 // $parts[2] == NEW code 140 $parts[2] = preg_replace( 141 array('|\n([!\x2B][^\n]*)|', 142 '|\n(\x2A{3}[^\n]*)|'), 143 array($ADD . '\1' . $CLOSE, 144 $DEL . '\1' . $CLOSE), 145 $parts[2]); 146 if (isset($parts[3])) { 147 // TRUE when handling multi-file patches 148 $parts[3] = preg_replace('|^(\x2D{3}[^\n]*)|', 149 $ADD . '\1' . $CLOSE, $parts[3]); 150 } // if 151 $sections[$i] = $DEL . $hits[1] . $CLOSE 152 . implode('', $parts); 153 } // if 154 // ELSE: leave $sections[$i] as is 155 } // for 156 $aDoc .= implode('', $sections); 157 return TRUE; 158 case 'n': // RCS output 159 // Only added lines are there so we highlight just the 160 // diff indicators while leaving the text alone. 161 $aDoc .= preg_replace( 162 array($DiffHead . ')#', 163 '|\n(d[0-9]+\s+[0-9]+)|', 164 '|\n(a[0-9]+\s+[0-9]+)|'), 165 array($HEAD . '\1' . $CLOSE, 166 $DEL . '\1' . $CLOSE, 167 $ADD . '\1' . $CLOSE), 168 $aText); 169 return TRUE; 170 case 's': // simple output 171 $aDoc .= preg_replace( 172 array($DiffHead 173 . '|((?:[0-9a-z]+(?:,[0-9a-z]+)*)(?:[^\n]*)))#', 174 '|\n(\x26#60;[^\n]*)|', 175 '|\n(\x26#62;[^\n]*)|'), 176 array($HEAD . '\1' . $CLOSE, 177 $DEL . '\1' . $CLOSE, 178 $ADD . '\1' . $CLOSE), 179 $aText); 180 return TRUE; 181 default: // unknown diff format 182 $aDoc .= $aText; // just append any unrecognized text 183 return TRUE; 184 } // switch 185 } // _addDiff() 186 187 /** 188 * Add the lines of the given <tt>$aList</tt> to the specified 189 * <tt>$aDoc</tt> beginning with the given <tt>$aStart</tt> linenumber. 190 * 191 * @param $aList Array [IN] the list of lines as prepared by 192 * <tt>render()</tt>, [OUT] <tt>FALSE</tt>. 193 * @param $aStart Integer The first linenumber to use. 194 * @param $aDoc String Reference to the current renderer's 195 * <tt>doc</tt> property. 196 * @private 197 * @see render() 198 */ 199 function _addLines(&$aList, $aStart, &$aDoc) { 200 // Since we're dealing with monospaced fonts here the width of each 201 // character (space, NBSP, digit) is the same. Hence the length of 202 // a digits string gives us its width i.e. the number of digits. 203 $i = $aStart + count($aList); // greatest line number 204 $g = strlen("$i"); // width of greatest number 205 while (list($i, $l) = each($aList)) { 206 unset($aList[$i]); // free mem 207 $aDoc .= '<span class="lno">' 208 . $this->_lead[$g - strlen("$aStart")] 209 . "$aStart:</span>" . ((($l) && (' ' != $l)) 210 ? " $l\n" 211 : "\n"); 212 ++$aStart; // increment line number 213 } // while 214 $aList = FALSE; // release memory 215 } // _addLines() 216 217 /** 218 * Internal convenience method to replace HTML special characters. 219 * 220 * @param $aString String [IN] The text to handle; 221 * [OUT] the modified text (i.e. the method's result). 222 * @return String The string with HTML special chars replaced. 223 * @private 224 * @since created 05-Feb-2007 225 */ 226 function &_entities(&$aString) { 227 $aString = str_replace(array('&', '<', '>'), 228 array('&', '<', '>'), $aString); 229 return $aString; 230 } // _entities() 231 232 /** 233 * Try to fix some markup error of the GeSHi SHELL highlighting. 234 * 235 * <p> 236 * The GeShi highlighting for type "sh" (i.e. "bash") is, well, 237 * seriously flawed (at least up to version 1.0.7.20 i.e. 2007-07-01). 238 * Especially handling of comments and embedded string as well as 239 * keyword is plain wrong. 240 * </p><p> 241 * This internal helper method tries to solve some minor problems by 242 * removing highlight markup embedded in comment markup. 243 * This is, however, by no means a final resolution: GeSHi obviously 244 * keeps a kind of internal state resulting in highlighting markup 245 * spawing (i.e. repeated on) several lines. 246 * Which - if that state is wrong - causes great demage not by 247 * corrupting the data but by confusing the reader with wrong markup. 248 * The easiest way to trigger such a line spawning confusion is to use 249 * solitary doublequotes or singlequotes (apostrophe) in a comment 250 * line ... 251 * </p> 252 * @param $aMarkup String [IN] The highlight markup as returned by GeSHi; 253 * [OUT] <tt>FALSE</tt>. 254 * @param $aDoc String Reference to the current renderer's 255 * <tt>doc</tt> property. 256 * @private 257 * @since created 04-Aug-2007 258 * @see render() 259 */ 260 function _fixGeSHi_Bash(&$aMarkup, &$aDoc) { 261 $hits = array(); 262 if (defined('GESHI_VERSION') 263 && preg_match('|(\d+)\.(\d+)\.(\d+)\.(\d+)|', GESHI_VERSION, $hits) 264 && ($hits = sprintf('%02u%02u%02u%03u', 265 $hits[1] * 1, $hits[2] * 1, $hits[3] * 1, $hits[4] * 1)) 266 && ('010007020' < $hits)) { 267 // GeSHi v1.0.7.21 has the comments bug fixed 268 $aDoc .= $aMarkup; 269 $aMarkup = FALSE; // release memory 270 return; 271 } // if 272 $lines = explode("\n", $aMarkup); 273 $aMarkup = FALSE; // release memory 274 while (list($i, $l) = each($lines)) { 275 $hits = array(); 276 // GeSHi "bash" module marks up comments with CSS class "re3": 277 if (preg_match('|^((.*)<span class="re3">)(.*)$|i', $l, $hits)) { 278 if ('#!/bin/' == substr($hits[3], 0, 7)) { 279 $lines[$i] = $hits[2] . strip_tags($hits[3]); 280 } else { 281 $lines[$i] = $hits[1] . strip_tags($hits[3]) . '</span>'; 282 } // if 283 } else if (! preg_match('|^\s*<span|i', $l)) { 284 // If a line doesn't start with a highlighted keyword 285 // all tags are removed since they're most probably 286 // "leftovers" from the GeSHI string/comment bug. 287 $lines[$i] = strip_tags($l); 288 } // if 289 } // while 290 $aDoc .= implode("\n", $lines); 291 } // _fixGeSHi_Bash() 292 293 /** 294 * Add markup to load JavaScript file with older DokuWiki versions. 295 * 296 * @param $aRenderer Object The renderer used. 297 * @private 298 * @since created 19-Feb-2007 299 * @see render() 300 */ 301 function _fixJS(&$aRenderer) { 302 //XXX This test will break if the DokuWiki file gets renamed: 303 if (@file_exists(DOKU_INC . 'lib/exe/js.php')) { 304 // Assuming a fairly recent DokuWiki installation 305 // handling the plugin files on its own there's 306 // nothing to do here ... 307 return; 308 } // if 309 if ($this->_JSmarkup) { 310 // Markup already added (or not needed) 311 return; 312 } // if 313 $localdir = realpath(dirname(__FILE__)) . '/'; 314 $webdir = DOKU_BASE . 'lib/plugins/code/'; 315 $css = ''; 316 if (file_exists($localdir . 'style.css')) { 317 ob_start(); 318 @include($localdir . 'style.css'); 319 // Remove whitespace from CSS and expand IMG paths: 320 if ($css = preg_replace( 321 array('|\s*/\x2A.*?\x2A/\s*|s', '|\s*([:;\{\},+!])\s*|', 322 '|(?:url\x28\s*)([^/])|', '|^\s*|', '|\s*$|'), 323 array(' ', '\1', 'url(' . $webdir . '\1'), 324 ob_get_contents())) { 325 $css = '<style type="text/css">' . $css . '</style>'; 326 } // if 327 ob_end_clean(); 328 } // if 329 $js = (file_exists($localdir . 'script.js')) 330 ? '<script type="text/javascript" src="' 331 . $webdir . 'script.js"></script>' 332 : ''; 333 if ($this->_JSmarkup = $css . $js) { 334 $aRenderer->doc = $this->_JSmarkup 335 . preg_replace('|\s*<p>\s*</p>\s*|', '', $aRenderer->doc); 336 //ELSE: Neither CSS nor JS files found. 337 } // if 338 // Set member field to skip tests with next call: 339 $this->_JSmarkup = TRUE; 340 } // _fixJS() 341 342 /** 343 * RegEx callback to markup spaces in ODT mode. 344 * 345 * @param $aList Array A list of RegEx matches. 346 * @private 347 * @static 348 * @since created 07-Jun-2008 349 * @see render() 350 */ 351 function _preserveSpaces($aList) { 352 return ($len = strlen($aList[1])) 353 ? '<text:s text:c="' . $len . '"/>' 354 : ' '; 355 } // _preserveSpaces() 356 357 /** 358 * Add the lines of the given <tt>$aText</tt> to the specified 359 * <tt>$aDoc</tt> beginning with the given <tt>$aStart</tt> linenumber. 360 * 361 * @param $aText String [IN] the text lines as prepared by 362 * <tt>handle()</tt>, [OUT] <tt>FALSE</tt>. 363 * @param $aStart Integer The first linenumber to use; 364 * if <tt>0</tt> (zero) no linenumbers are used. 365 * @param $aDoc String Reference to the current renderer's 366 * <tt>doc</tt> property. 367 * @param $aClass String The CSS class name for the <tt>PRE</tt> tag. 368 * @param $addTags Boolean Used in "ODT" mode to suppress tagging 369 * the line numbers. 370 * @private 371 * @since created 03-Feb-2007 372 * @see render() 373 */ 374 function _rawMarkup(&$aText, $aStart, &$aDoc, $aClass, $addTags = TRUE) { 375 if ($addTags) { 376 $aDoc .= '<pre class="' . $aClass . '">' . "\n"; 377 } // if 378 if ($aStart) { 379 // Split the prepared data into a list of lines: 380 $aText = explode("\n", $aText); 381 // Add the numbered lines to the document: 382 $this->_addLines($aText, $aStart, $aDoc); 383 } else { 384 $aDoc .= $aText; 385 } // if 386 if ($addTags) { 387 $aDoc .= '</pre>'; 388 } // if 389 $aText = FALSE; // release memory 390 } // _rawMarkup() 391 392 /** 393 * RegEx callback to replace SPAN tags in ODT mode. 394 * 395 * @param $aList Array A list of RegEx matches. 396 * @private 397 * @static 398 * @since created 07-Jun-2008 399 * @see render() 400 */ 401 function _replaceSpan($aList) { 402 return ($aList[3]) 403 ? '<text:span text:style-name="Code_5f_' 404 . str_replace('_', '_5f_', $aList[3]) . '">' 405 : '<text:span>'; 406 } // _replaceSpan() 407 408 //@} 409 /** 410 * @publicsection 411 */ 412 //@{ 413 414 /** 415 * Tell the parser whether the plugin accepts syntax mode 416 * <tt>$aMode</tt> within its own markup. 417 * 418 * @param $aMode String The requested syntaxmode. 419 * @return Boolean <tt>FALSE</tt> (no nested markup allowed). 420 * @public 421 * @see getAllowedTypes() 422 */ 423 function accepts($aMode) { 424 return FALSE; 425 } // accepts() 426 427 /** 428 * Connect lookup pattern to lexer. 429 * 430 * @param $aMode String The desired rendermode. 431 * @public 432 * @see render() 433 */ 434 function connectTo($aMode) { 435 // look-ahead to minimize the chance of false matches: 436 $this->Lexer->addEntryPattern( 437 '\x3Ccode(?=[^>]*\x3E\r?\n.*\n\x3C\x2Fcode\x3E)', 438 $aMode, 'plugin_code'); 439 } // connectTo() 440 441 /** 442 * Get an array of mode types that may be nested within the 443 * plugin's own markup. 444 * 445 * @return Array Allowed nested types (none). 446 * @public 447 * @see accepts() 448 * @static 449 */ 450 function getAllowedTypes() { 451 return array(); 452 } // getAllowedTypes() 453 454 /** 455 * Get an associative array with plugin info. 456 * 457 * <p> 458 * The returned array holds the following fields: 459 * <dl> 460 * <dt>author</dt><dd>Author of the plugin</dd> 461 * <dt>email</dt><dd>Email address to contact the author</dd> 462 * <dt>date</dt><dd>Last modified date of the plugin in 463 * <tt>YYYY-MM-DD</tt> format</dd> 464 * <dt>name</dt><dd>Name of the plugin</dd> 465 * <dt>desc</dt><dd>Short description of the plugin (Text only)</dd> 466 * <dt>url</dt><dd>Website with more information on the plugin 467 * (eg. syntax description)</dd> 468 * </dl> 469 * @return Array Information about this plugin class. 470 * @public 471 * @static 472 */ 473 function getInfo() { 474 $c = 'code'; // hack to hide "desc" field from GeShi 475 return array( 476 'author' => 'Matthias Watermann', 477 'email' => 'support@mwat.de', 478 'date' => '2008-07-22', 479 'name' => 'Code Syntax Plugin', 480 'desc' => 'Syntax highlighting with line numbering <' 481 . $c . ' lang 1 |[fh] text |[hs]> ... </' . $c . '>', 482 'url' => 'http://wiki.splitbrain.org/plugin:code2'); 483 } // getInfo() 484 485 /** 486 * Define how this plugin is handled regarding paragraphs. 487 * 488 * <p> 489 * This method is important for correct XHTML nesting. 490 * It returns one of the following values: 491 * </p><dl> 492 * <dt>normal</dt><dd>The plugin can be used inside paragraphs.</dd> 493 * <dt>block</dt><dd>Open paragraphs need to be closed before 494 * plugin output.</dd> 495 * <dt>stack</dt><dd>Special case: Plugin wraps other paragraphs.</dd> 496 * </dl> 497 * @return String <tt>"block"</tt>. 498 * @public 499 * @static 500 */ 501 function getPType() { 502 return 'block'; 503 } // getPType() 504 505 /** 506 * Where to sort in? 507 * 508 * @return Integer <tt>194</tt> (below "Doku_Parser_Mode_code"). 509 * @public 510 * @static 511 */ 512 function getSort() { 513 // class "Doku_Parser_Mode_code" returns 200 514 return 194; 515 } // getSort() 516 517 /** 518 * Get the type of syntax this plugin defines. 519 * 520 * @return String <tt>"protected"</tt>. 521 * @public 522 * @static 523 */ 524 function getType() { 525 return 'protected'; 526 } // getType() 527 528 /** 529 * Handler to prepare matched data for the rendering process. 530 * 531 * <p> 532 * The <tt>$aState</tt> parameter gives the type of pattern 533 * which triggered the call to this method: 534 * </p><dl> 535 * <dt>DOKU_LEXER_UNMATCHED</dt> 536 * <dd>ordinary text encountered within the plugin's syntax mode 537 * which doesn't match any pattern.</dd> 538 * </dl> 539 * @param $aMatch String The text matched by the patterns. 540 * @param $aState Integer The lexer state for the match. 541 * @param $aPos Integer The character position of the matched text. 542 * @param $aHandler Object Reference to the Doku_Handler object. 543 * @return Array Index <tt>[0]</tt> holds the current <tt>$aState</tt>, 544 * index <tt>[1]</tt> the embedded text to highlight, 545 * index <tt>[2]</tt> the language/dialect (or <tt>FALSE</tt>), 546 * index <tt>[3]</tt> the first line number (or <tt>0</tt>), 547 * index <tt>[4]</tt> the top title (or <tt>FALSE</tt>), 548 * index <tt>[5]</tt> the bottom title (or <tt>FALSE</tt>), 549 * index <tt>[6]</tt> hidding CSS flag (or <tt>""</tt>). 550 * @public 551 * @see render() 552 * @static 553 */ 554 function handle($aMatch, $aState, $aPos, &$aHandler) { 555 if (DOKU_LEXER_UNMATCHED != $aState) { 556 return array($aState); // nothing to do for "render()" 557 } // if 558 $aMatch = explode('>', $aMatch, 2); 559 // $aMatch[0] : lang etc. 560 // $aMatch[1] : text to highlight 561 $n = explode('>', trim($aMatch[1])); 562 $l = 'extern'; // external resource requested? 563 // Check whether there's an external file to fetch: 564 if ($l == $n[0]) { 565 if ($n[1] = trim($n[1])) { 566 if (is_array($n[0] = @parse_url($n[1])) 567 && ($n[0] = $n[0]['scheme'])) { 568 // Don't accept unsecure schemes like 569 // "file", "javascript", "mailto" etc. 570 switch ($n[0]) { 571 case 'ftp': 572 case 'http': 573 case 'https': 574 //XXX This might fail due to global PHP setup: 575 if ($handle = @fopen($n[1], 'rb')) { 576 $aMatch[1] = ''; 577 while (! @feof($handle)) { 578 //XXX This might fail due to 579 // memory constraints: 580 $aMatch[1] .= @fread($handle, 0x8000); 581 } // while 582 @fclose($handle); 583 } else { 584 $aMatch = array($l, 585 'Failed to retrieve: ' . $n[1]); 586 } // if 587 break; 588 default: 589 $aMatch = array($l, 590 'Unsupported URL scheme: ' . $n[0]); 591 break; 592 } // switch 593 } else { 594 $aMatch = array($l, 'Invalid URL: ' . $n[1]); 595 } // if 596 } else { 597 $aMatch = array($l, 'Missing URL: ' . $aMatch[1]); 598 } // if 599 } // if 600 // Strip leading/trailing/EoL whitespace, 601 // replace TABs by four spaces, " " by NBSP: 602 $aMatch[1] = preg_replace( 603 array('#(?>\r\n)|\r#', '|^\n\n*|', 604 '|[\t ]+\n|', '|\s*\n$|'), 605 array("\n", '', "\n", ''), 606 str_replace(' ', ' ', 607 str_replace("\t", ' ', $aMatch[1]))); 608 609 $css = ''; // default: no initial CSS content hidding 610 $l = FALSE; // default: no language 611 $n = 0; // default: no line numbers 612 $ht = $ft = FALSE; // default: no (head/foot) title 613 $hits = array(); // RegEx matches from the tag attributes 614 /* 615 The free form of the RegEx to parse the arguments here is: 616 /^ 617 # "eat" leading whitespace: 618 \s* 619 (?=\S) # Look ahead: do not match empty lines. This is 620 # needed since all other expressions are optional. 621 # Make sure, nothing is given away once it matched: 622 (?> 623 # We need a separate branch for "diff" because it may be 624 # followed by a _letter_ (not digit) indicating the format. 625 (?> 626 (diff) 627 # match 1 628 (?>\s+([cnrsu]?))? 629 # match 2 630 ) 631 | 632 # Branch for standard language highlighting 633 (?> 634 # extract language: 635 ([a-z][^\x7C\s]*) 636 # match 3 637 (?> 638 # extract starting line number: 639 \s+(\d\d*) 640 # match 4 641 )? 642 ) 643 | 644 # Branch for line numbering only 645 (\d\d*) 646 # match 5 647 | 648 \s* # dummy needed to match "title only" markup (below) 649 ) 650 # "eat" anything else up to the text delimiter: 651 [^\x7C]* 652 (?> 653 \x7C 654 # extract the position flag: 655 ([bfht])?\s* 656 # match 6 657 # extract the header,footer line: 658 ([^\x7C]+) 659 # match 7 660 (?> 661 # see whether there is a class flag: 662 \x7C\s* 663 (h|s)?.* 664 # match 8 665 )? 666 )? 667 # Anchored to make sure everything gets matched: 668 $/xiu 669 670 Since compiling and applying a free form RegEx slows down the 671 overall matching process I've folded it all to a standard RegEx. 672 Benchmarking during development gave me 673 free form: 20480 loops, 552960 hits, 102400 fails, 12.994689 secs 674 standard: 20480 loops, 552960 hits, 102400 fails, 8.357169 secs 675 */ 676 if (preg_match('/^\s*(?=\S)(?>(?>(diff)(?>\s+([cnrsu]?))?)|' 677 . '(?>([a-z][^\x7C\s]*)(?>\s+(\d\d*))?)|(\d\d*)|\s*)[^\x7C]*' 678 . '(?>\x7C([bfht])?\s*([^\x7C]+)(?>\x7C\s*(h|s)?.*)?)?$/iu', 679 $aMatch[0], $hits)) { 680 unset($hits[0]); // free mem 681 // $hits[1] = "diff" 682 // $hits[2] = type (of [1]) 683 // $hits[3] = LANG 684 // $hits[4] = NUM (of [3]) 685 // $hits[5] = NUM (alone) 686 // $hits[6] = Top/Bottom flag (of [7]) 687 // $hits[7] = TITLE 688 // $hits[8] = s/h CSS flag 689 if (isset($hits[3]) && ($hits[3])) { 690 $l = strtolower($hits[3]); 691 if (isset($hits[4]) && ($hits[4])) { 692 $n = (int)$hits[4]; 693 } // if 694 $hits[3] = $hits[4] = FALSE; 695 } else if (isset($hits[1]) && ($hits[1])) { 696 $l = strtolower($hits[1]); 697 $hits[2] = (isset($hits[2])) 698 ? strtolower($hits[2]) . '?' 699 : '?'; 700 $n = $hits[2]{0}; 701 $hits[1] = $hits[2] = FALSE; 702 } else if (isset($hits[5]) && ($hits[5])) { 703 $n = (int)$hits[5]; 704 } // if 705 if (isset($hits[7]) && ($hits[7])) { 706 $hits[6] = (isset($hits[6])) 707 ? strtolower($hits[6]) . 'f' 708 : 'f'; 709 switch ($hits[6]{0}) { 710 case 'h': 711 case 't': 712 $ht = trim($hits[7]); 713 break; 714 default: 715 $ft = trim($hits[7]); 716 break; 717 } // switch 718 if (isset($hits[8])) { 719 $hits[8] = strtolower($hits[8]) . 's'; 720 if ('h' == $hits[8]{0}) { 721 // This class is handled by JavaScript (there 722 // _must_not_ be any CSS rules for this): 723 $css = ' HideOnInit'; 724 } // if 725 } // if 726 $hits[6] = $hits[7] = $hits[8] = FALSE; 727 } // if 728 // ELSE: no arguments given to CODE tag 729 } // if 730 switch ($l) { 731 case 'console': 732 // nothing additional to setup here 733 break; 734 case 'diff': 735 if ("\n" != $aMatch[1]{0}) { 736 // A leading LF is needed to recognize and handle 737 // the very first line with all the REs used. 738 $aMatch[1] = "\n" . $aMatch[1]; 739 } // if 740 switch ($n) { 741 case 'u': // DIFF cmdline switch for "unified" 742 case 'c': // DIFF cmdline switch for "context" 743 case 'n': // DIFF cmdline switch for "RCS" 744 case 's': 745 // We believe the format hint ... 746 // (or should we be more suspicious?) 747 break; 748 case 'r': // Mnemonic for "RCS" 749 $n = 'n'; 750 break; 751 default: // try to figure out the format actually used 752 if (preg_match( 753 '|\n(?:\x2A{5,}\n\x2A{3}\s[1-9]+.*?\x2A{4}\n.+?)+|s', 754 $aMatch[1])) { 755 $n = 'c'; 756 } else if (preg_match( 757 '|\n@@\s\-[0-9]+,[0-9]+[ \+,0-9]+?@@\n.+\n|s', 758 $aMatch[1])) { 759 $n = 'u'; 760 } else if (preg_match( 761 '|\n[ad][0-9]+\s+[0-9]+\r?\n|', $aMatch[1])) { 762 // We've to check this _before_ "simple" since 763 // the REs are quite similar (but this one is 764 // slightly more specific). 765 $n = 'n'; 766 } else if (preg_match( 767 '|\n(?:[0-9a-z]+(?:,[0-9a-z]+)*)(?:[^\n]*\n.*?)+|', 768 $aMatch[1])) { 769 $n = 's'; 770 } else { 771 $n = '?'; 772 } // if 773 break; 774 } // switch 775 break; 776 case 'htm': // convenience shortcut 777 case 'html': // dito 778 $l = 'html4strict'; 779 break; 780 case 'js': // shortcut 781 $l = 'javascript'; 782 break; 783 case 'sh': // shortcut 784 $l = 'bash'; 785 break; 786 default: 787 if (! $l) { 788 // no language: simple PRE markup will get generated 789 $l = FALSE; 790 } // if 791 break; 792 } // switch 793 return array(DOKU_LEXER_UNMATCHED, 794 $aMatch[1], $l, $n, $ht, $ft, $css); 795 } // handle() 796 797 /** 798 * Add exit pattern to lexer. 799 * 800 * @public 801 */ 802 function postConnect() { 803 // look-before to minimize the chance of false matches: 804 $this->Lexer->addExitPattern('(?<=\n)\x3C\x2Fcode\x3E', 805 'plugin_code'); 806 } // postConnect() 807 808 /** 809 * Handle the actual output (markup) creation. 810 * 811 * <p> 812 * The method checks the given <tt>$aFormat</tt> to decide how to 813 * handle the specified <tt>$aData</tt>. 814 * The standard case (i.e. <tt>"xhtml"</tt>) is handled completely 815 * by this implementation, preparing linenumbers and/or head/foot 816 * lines are requested. 817 * For the <tt>"odt"</tt> format all plugin features (incl. linenumbers 818 * and header/footer lines) are supported by generating the appropriate 819 * ODT/XML markup. 820 * All other formats are passed back to the given <tt>$aRenderer</tt> 821 * instance for further handling. 822 * </p><p> 823 * <tt>$aRenderer</tt> contains a reference to the renderer object 824 * which is currently in charge of the rendering. 825 * The contents of the given <tt>$aData</tt> is the return value 826 * of the <tt>handle()</tt> method. 827 * </p> 828 * @param $aFormat String The output format to generate. 829 * @param $aRenderer Object A reference to the renderer object. 830 * @param $aData Array The data created/returned by the 831 * <tt>handle()</tt> method. 832 * @return Boolean <tt>TRUE</tt>. 833 * @public 834 * @see handle() 835 */ 836 function render($aFormat, &$aRenderer, &$aData) { 837 if (DOKU_LEXER_UNMATCHED != $aData[0]) { 838 return TRUE; 839 } // if 840 if ('xhtml' == $aFormat) { 841 if ($tdiv = (($aData[4]) || ($aData[5]))) { 842 $this->_fixJS($aRenderer); // check for old DokuWiki versions 843 $aRenderer->doc .= '<div class="code">'; 844 if ($aData[4]) { 845 //XXX Note that "_headerToLink()" is supposed to be a 846 // _private_ method of the renderer class; so this code 847 // will fail once DokuWiki is rewritten in PHP5 which 848 // implements encapsulation of private methods and 849 // properties: 850 $aRenderer->doc .= '<p class="codehead' . $aData[6] 851 . '"><a name="' . $aRenderer->_headerToLink($aData[4]) 852 . '">' . $this->_entities($aData[4]) . '</a></p>'; 853 $aData[4] = $aData[6] = FALSE; // free mem 854 } // if 855 } // if 856 if ($aData[2]) { // lang was given 857 if ('console' == $aData[2]) { 858 $this->_rawMarkup($this->_entities($aData[1]), 859 $aData[3], $aRenderer->doc, $aData[2]); 860 } else if ('diff' == $aData[2]) { 861 $this->_entities($aData[1]); 862 $aRenderer->doc .= '<pre class="code diff">'; 863 $this->_addDiff($aData[1], $aData[3], $aRenderer->doc); 864 $aRenderer->doc .= '</pre>'; 865 } else { 866 $isSH = ('bash' == $aData[2]); 867 $geshi = new GeSHi($aData[1], $aData[2], GESHI_LANG_ROOT); 868 if ($geshi->error()) { 869 // Language not supported by "GeSHi" 870 $geshi = NULL; // release memory 871 $this->_rawMarkup($this->_entities($aData[1]), 872 $aData[3], $aRenderer->doc, 'code'); 873 } else { 874 $aData[1] = FALSE; // free mem 875 $geshi->enable_classes(); 876 $geshi->set_encoding('utf-8'); 877 $geshi->set_header_type(GESHI_HEADER_PRE); 878 $geshi->set_overall_class('code ' . $aData[2]); 879 global $conf; 880 if ($conf['target']['extern']) { 881 $geshi->set_link_target($conf['target']['extern']); 882 } // if 883 if ($aData[3]) { // line numbers requested 884 // Separate PRE tag from parsed data: 885 $aData[1] = explode('>', $geshi->parse_code(), 2); 886 // [1][0] = leading "<pre" 887 // [1][1] = remaining markup up to trailing "</pre" 888 $geshi = NULL; // release memory 889 890 // Add the open tag to the document: 891 $aRenderer->doc .= $aData[1][0] . '>'; 892 893 // Separate trailing PRE tag: 894 $aData[1] = explode('</pre>', $aData[1][1], 2); 895 // [1][0] = GeSHi markup 896 // [1][1] = trailing "</pre" 897 898 if ($isSH) { 899 $aData[1][1] = ''; 900 $this->_fixGeSHi_Bash($aData[1][0], 901 $aData[1][1]); 902 } else { 903 // Set reference to fixed markup to sync with 904 // the "bash" execution path (above): 905 $aData[1][1] =& $aData[1][0]; 906 } // if 907 908 // Split the parsed data into a list of lines: 909 $aData[2] = explode("\n", $aData[1][1]); 910 $aData[1] = FALSE; // free mem 911 912 // Add the numbered lines to the document: 913 $this->_addLines($aData[2], $aData[3], 914 $aRenderer->doc); 915 916 // Close the preformatted section markup: 917 $aRenderer->doc .= '</pre>'; 918 } else { // w/o line numbering 919 if ($isSH) { 920 // Separate trailing PRE tag which 921 // sometimes is "forgotten" by GeSHi: 922 $aData[2] = explode('</pre>', 923 $geshi->parse_code(), 2); 924 // [1][0] = GeSHi markup 925 // [1][1] = trailing "</pre" (if any) 926 $this->_fixGeSHi_Bash($aData[2][0], 927 $aRenderer->doc); 928 $aRenderer->doc .= '</pre>'; 929 } else { 930 $aRenderer->doc .= $geshi->parse_code(); 931 } // if 932 $geshi = NULL; // release memory 933 } // if 934 } // if 935 } // if 936 } else { 937 $this->_rawMarkup($this->_entities($aData[1]), 938 $aData[3], $aRenderer->doc, 'code'); 939 } // if 940 if ($tdiv) { 941 if ($aData[5]) { 942 //XXX See "_headerToLink()" note above. 943 $aRenderer->doc .= '<p class="codefoot' 944 . $aData[6] . '"><a name="' 945 . $aRenderer->_headerToLink($aData[5]) . '">' 946 . $this->_entities($aData[5]) . '</a></p>'; 947 } // if 948 $aRenderer->doc .= '</div>'; 949 } // if 950 } else if ('odt' == $aFormat) { 951 $inLI = array(); 952 if (preg_match('|^<text:p text:style-name="[^"]+">\s*</text:p>\s*(.*)$|si', 953 $aRenderer->doc, $inLI)) { 954 // remove leading whitespace 955 $aRenderer->doc = $inLI[1]; 956 } // if 957 // The "renderer_plugin_odt" doesn't clean (close) 958 // its own tags before calling this plugin. 959 // To work around that bug we have to check some 960 // private properties of the renderer instance. 961 $inLI = FALSE; 962 if (is_a($aRenderer, 'renderer_plugin_odt')) { 963 if ($inLI = ($aRenderer->in_list_item)) { 964 // If we're in a list item, we've to close the paragraph: 965 $aRenderer->doc .= '</text:p>'; 966 } // if 967 if ($aRenderer->in_paragraph) { 968 $aRenderer->doc .= '</text:p>'; 969 $aRenderer->in_paragraph = FALSE; 970 } // if 971 } // if 972 973 // Init (open) our text section: 974 $aRenderer->doc .= "\n" 975 . '<text:section text:style-name="Code_5f_Section" text:name="CodeSnippet' 976 . ++$this->_odtSect . '">'; 977 978 if ($tdiv = (($aData[4]) || ($aData[5]))) { 979 // Check whether we need a top caption ("header"): 980 if ($aData[4]) { 981 $aRenderer->doc .= 982 '<text:p text:style-name="Code_5f_Title">' 983 . "<text:line-break/>\n" 984 . $aData[4] . "</text:p>\n"; 985 $aData[4] = $aData[6] = FALSE; // free mem 986 } // if 987 } // if 988 // The following code resembles the "xhtml" processing 989 // above except that we're not using "pre" tags here 990 // but ODT/XML markup. 991 $aData[0] = ''; // tmp. container of processed data 992 if ($aData[2]) { // lang was given 993 if ('console' == $aData[2]) { 994 $this->_rawMarkup($this->_entities($aData[1]), 995 $aData[3], $aData[0], $aData[2], FALSE); 996 } else if ('diff' == $aData[2]) { 997 $this->_addDiff($this->_entities($aData[1]), 998 $aData[3], $aData[0]); 999 } else { 1000 $isSH = ('bash' == $aData[2]); 1001 $geshi = new GeSHi($aData[1], $aData[2], GESHI_LANG_ROOT); 1002 if ($geshi->error()) { 1003 // Language not supported by "GeSHi" 1004 $geshi = NULL; // release memory 1005 $this->_rawMarkup($this->_entities($aData[1]), 1006 $aData[3], $aData[0], '', FALSE); 1007 } else { 1008 $aData[1] = FALSE; // free mem 1009 $geshi->enable_classes(); 1010 $geshi->set_encoding('utf-8'); 1011 $geshi->set_header_type(GESHI_HEADER_PRE); 1012 $geshi->set_overall_class('code ' . $aData[2]); 1013 global $conf; 1014 if ($conf['target']['extern']) { 1015 $geshi->set_link_target($conf['target']['extern']); 1016 } // if 1017 // Separate PRE tag from parsed data: 1018 $aData[1] = explode('>', $geshi->parse_code(), 2); 1019 // [1][0] = leading "<pre" 1020 // [1][1] = remaining markup up to trailing "</pre" 1021 $geshi = NULL; // release memory 1022 1023 // Separate trailing PRE tag: 1024 $aData[1] = explode('</pre>', $aData[1][1], 2); 1025 // [1][0] = GeSHi markup 1026 // [1][1] = trailing "</pre" 1027 $aData[1] = $aData[1][0]; 1028 1029 if ($isSH) { // work around GeSHI bug 1030 $aData[2] = ''; 1031 $this->_fixGeSHi_Bash($aData[1], $aData[2]); 1032 } else { 1033 $aData[2] = $aData[1]; 1034 } // if 1035 $aData[1] = FALSE; // release memory 1036 1037 if ($aData[3]) { // line numbers requested 1038 // Split the parsed data into a list of lines: 1039 $aData[1] = explode("\n", $aData[2]); 1040 $aData[2] = FALSE; // release memory 1041 1042 // Add the numbered lines to the document: 1043 $this->_addLines($aData[1], $aData[3], $aData[0]); 1044 } else { // w/o line numbers 1045 $aData[0] = $aData[2]; 1046 $aData[2] = FALSE; // release memory 1047 } // if 1048 } // if 1049 } // if 1050 } else { 1051 $this->_rawMarkup($this->_entities($aData[1]), 1052 $aData[3], $aData[0], '', FALSE); 1053 } // if 1054 1055 if ('console' == $aData[2]) { 1056 $aRenderer->doc .= 1057 '<text:p text:style-name="Code_5f_Console">'; 1058 } else { 1059 $aRenderer->doc .= 1060 '<text:p text:style-name="Code_5f_Standard">'; 1061 } // if 1062 // Replace the HTML "span" tags (for highlighting) by 1063 // the appropriate ODT/XML markup. 1064 // For unknown reasons we need an additional space 1065 // in front of the very first line. 1066 $aData[0] = '<text:s/>' 1067 . preg_replace_callback('|(<span( class="([^"]*)"[^>]*)?>)|', 1068 array('syntax_plugin_code', '_replaceSpan'), 1069 // OOo (v2.3) crashes on " " 1070 str_replace(' ', chr(194) . chr(160), 1071 str_replace('</span>', '</text:span>', 1072 strip_tags($aData[0], '<span>')))); 1073 // Now append our markup to the renderer's document; 1074 // TABs, LFs and SPACEs are replaced by their respective 1075 // ODT/XML equivalents: 1076 $aRenderer->doc .= preg_replace_callback('|( {2,})|', 1077 array('syntax_plugin_code', '_preserveSpaces'), 1078 str_replace("\n", "<text:line-break/>\n", $aData[0])); 1079 $aData[0] = FALSE; // release memory 1080 1081 // Check whether we need a bottom caption ("footer"): 1082 if ($tdiv && ($aData[5])) { 1083 $aRenderer->doc .= 1084 '</text:p><text:p text:style-name="Code_5f_Title">' 1085 . $aData[5]; 1086 } // if 1087 // Close all our open tags: 1088 $aRenderer->doc .= "</text:p></text:section>\n"; 1089 1090 if ($inLI) { 1091 // Workaround (see above): (re-)open a paragraph: 1092 $aRenderer->doc .= '<text:p>'; 1093 } // if 1094 } else { // unsupported output format 1095 $aData[0] = $aData[4] = $aData[5] = FALSE; // avoid recursion 1096 // Pass anything else back to the renderer instance 1097 // (which will - hopefully - know how to handle it): 1098 $aRenderer->code($aData[1], $aData[2]); 1099 } // if 1100 $aData = array(FALSE); // don't process this text again 1101 return TRUE; 1102 } // render() 1103 1104 //@} 1105} // class syntax_plugin_code 1106} // if 1107?> 1108