1<?php 2/** 3 * GeSHi - Generic Syntax Highlighter 4 * 5 * The GeSHi class for Generic Syntax Highlighting. Please refer to the 6 * documentation at http://qbnz.com/highlighter/documentation.php for more 7 * information about how to use this class. 8 * 9 * For changes, release notes, TODOs etc, see the relevant files in the docs/ 10 * directory. 11 * 12 * This file is part of GeSHi. 13 * 14 * GeSHi is free software; you can redistribute it and/or modify 15 * it under the terms of the GNU General Public License as published by 16 * the Free Software Foundation; either version 2 of the License, or 17 * (at your option) any later version. 18 * 19 * GeSHi is distributed in the hope that it will be useful, 20 * but WITHOUT ANY WARRANTY; without even the implied warranty of 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 * GNU General Public License for more details. 23 * 24 * You should have received a copy of the GNU General Public License 25 * along with GeSHi; if not, write to the Free Software 26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 27 * 28 * @package geshi 29 * @subpackage core 30 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de> 31 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2014 Benny Baumann 32 * @license http://gnu.org/copyleft/gpl.html GNU GPL 33 */ 34 35// 36// GeSHi Constants 37// You should use these constant names in your programs instead of 38// their values - you never know when a value may change in a future 39// version 40// 41 42/** The version of this GeSHi file */ 43define('GESHI_VERSION', '1.0.9.1'); 44 45// Define the root directory for the GeSHi code tree 46if (!defined('GESHI_ROOT')) { 47 /** The root directory for GeSHi */ 48 define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR); 49} 50/** The language file directory for GeSHi 51 @access private */ 52define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR); 53 54// Define if GeSHi should be paranoid about security 55if (!defined('GESHI_SECURITY_PARANOID')) { 56 /** Tells GeSHi to be paranoid about security settings */ 57 define('GESHI_SECURITY_PARANOID', false); 58} 59 60// Line numbers - use with enable_line_numbers() 61/** Use no line numbers when building the result */ 62define('GESHI_NO_LINE_NUMBERS', 0); 63/** Use normal line numbers when building the result */ 64define('GESHI_NORMAL_LINE_NUMBERS', 1); 65/** Use fancy line numbers when building the result */ 66define('GESHI_FANCY_LINE_NUMBERS', 2); 67 68// Container HTML type 69/** Use nothing to surround the source */ 70define('GESHI_HEADER_NONE', 0); 71/** Use a "div" to surround the source */ 72define('GESHI_HEADER_DIV', 1); 73/** Use a "pre" to surround the source */ 74define('GESHI_HEADER_PRE', 2); 75/** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */ 76define('GESHI_HEADER_PRE_VALID', 3); 77/** 78 * Use a "table" to surround the source: 79 * 80 * <table> 81 * <thead><tr><td colspan="2">$header</td></tr></thead> 82 * <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody> 83 * <tfooter><tr><td colspan="2">$footer</td></tr></tfoot> 84 * </table> 85 * 86 * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at 87 * https://bugzilla.mozilla.org/show_bug.cgi?id=365805 88 * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE 89 */ 90define('GESHI_HEADER_PRE_TABLE', 4); 91 92// Capatalisation constants 93/** Lowercase keywords found */ 94define('GESHI_CAPS_NO_CHANGE', 0); 95/** Uppercase keywords found */ 96define('GESHI_CAPS_UPPER', 1); 97/** Leave keywords found as the case that they are */ 98define('GESHI_CAPS_LOWER', 2); 99 100// Link style constants 101/** Links in the source in the :link state */ 102define('GESHI_LINK', 0); 103/** Links in the source in the :hover state */ 104define('GESHI_HOVER', 1); 105/** Links in the source in the :active state */ 106define('GESHI_ACTIVE', 2); 107/** Links in the source in the :visited state */ 108define('GESHI_VISITED', 3); 109 110// Important string starter/finisher 111// Note that if you change these, they should be as-is: i.e., don't 112// write them as if they had been run through htmlentities() 113/** The starter for important parts of the source */ 114define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>'); 115/** The ender for important parts of the source */ 116define('GESHI_END_IMPORTANT', '<END GeSHi>'); 117 118/**#@+ 119 * @access private 120 */ 121// When strict mode applies for a language 122/** Strict mode never applies (this is the most common) */ 123define('GESHI_NEVER', 0); 124/** Strict mode *might* apply, and can be enabled or 125 disabled by {@link GeSHi->enable_strict_mode()} */ 126define('GESHI_MAYBE', 1); 127/** Strict mode always applies */ 128define('GESHI_ALWAYS', 2); 129 130// Advanced regexp handling constants, used in language files 131/** The key of the regex array defining what to search for */ 132define('GESHI_SEARCH', 0); 133/** The key of the regex array defining what bracket group in a 134 matched search to use as a replacement */ 135define('GESHI_REPLACE', 1); 136/** The key of the regex array defining any modifiers to the regular expression */ 137define('GESHI_MODIFIERS', 2); 138/** The key of the regex array defining what bracket group in a 139 matched search to put before the replacement */ 140define('GESHI_BEFORE', 3); 141/** The key of the regex array defining what bracket group in a 142 matched search to put after the replacement */ 143define('GESHI_AFTER', 4); 144/** The key of the regex array defining a custom keyword to use 145 for this regexp's html tag class */ 146define('GESHI_CLASS', 5); 147 148/** Used in language files to mark comments */ 149define('GESHI_COMMENTS', 0); 150 151/** some old PHP / PCRE subpatterns only support up to xxx subpatterns in 152 regular expressions. Set this to false if your PCRE lib is up to date 153 @see GeSHi->optimize_regexp_list() 154 **/ 155define('GESHI_MAX_PCRE_SUBPATTERNS', 500); 156/** it's also important not to generate too long regular expressions 157 be generous here... but keep in mind, that when reaching this limit we 158 still have to close open patterns. 12k should do just fine on a 16k limit. 159 @see GeSHi->optimize_regexp_list() 160 **/ 161define('GESHI_MAX_PCRE_LENGTH', 12288); 162 163//Number format specification 164/** Basic number format for integers */ 165define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+ 166/** Enhanced number format for integers like seen in C */ 167define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]? 168/** Number format to highlight binary numbers with a suffix "b" */ 169define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB] 170/** Number format to highlight binary numbers with a prefix % */ 171define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+ 172/** Number format to highlight binary numbers with a prefix 0b (C) */ 173define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+ 174/** Number format to highlight octal numbers with a leading zero */ 175define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+ 176/** Number format to highlight octal numbers with a prefix 0o (logtalk) */ 177define('GESHI_NUMBER_OCT_PREFIX_0O', 512); //0[0-7]+ 178/** Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series). */ 179define('GESHI_NUMBER_OCT_PREFIX_AT', 1024); //@[0-7]+ 180/** Number format to highlight octal numbers with a suffix of o */ 181define('GESHI_NUMBER_OCT_SUFFIX', 2048); //[0-7]+[oO] 182/** Number format to highlight hex numbers with a prefix 0x */ 183define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+ 184/** Number format to highlight hex numbers with a prefix $ */ 185define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192); //$[0-9a-fA-F]+ 186/** Number format to highlight hex numbers with a suffix of h */ 187define('GESHI_NUMBER_HEX_SUFFIX', 16384); //[0-9][0-9a-fA-F]*h 188/** Number format to highlight floating-point numbers without support for scientific notation */ 189define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+ 190/** Number format to highlight floating-point numbers without support for scientific notation */ 191define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f 192/** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */ 193define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+ 194/** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */ 195define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+ 196//Custom formats are passed by RX array 197 198// Error detection - use these to analyse faults 199/** No sourcecode to highlight was specified 200 * @deprecated 201 */ 202define('GESHI_ERROR_NO_INPUT', 1); 203/** The language specified does not exist */ 204define('GESHI_ERROR_NO_SUCH_LANG', 2); 205/** GeSHi could not open a file for reading (generally a language file) */ 206define('GESHI_ERROR_FILE_NOT_READABLE', 3); 207/** The header type passed to {@link GeSHi->set_header_type()} was invalid */ 208define('GESHI_ERROR_INVALID_HEADER_TYPE', 4); 209/** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */ 210define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5); 211/**#@-*/ 212 213 214/** 215 * The GeSHi Class. 216 * 217 * Please refer to the documentation for GeSHi 1.0.X that is available 218 * at http://qbnz.com/highlighter/documentation.php for more information 219 * about how to use this class. 220 * 221 * @package geshi 222 * @author Nigel McNie <nigel@geshi.org> 223 * @author Benny Baumann <BenBE@omorphia.de> 224 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2014 Benny Baumann 225 */ 226class GeSHi { 227 228 /** 229 * The source code to highlight 230 * @var string 231 */ 232 protected $source = ''; 233 234 /** 235 * The language to use when highlighting 236 * @var string 237 */ 238 protected $language = ''; 239 240 /** 241 * The data for the language used 242 * @var array 243 */ 244 protected $language_data = array(); 245 246 /** 247 * The path to the language files 248 * @var string 249 */ 250 protected $language_path = GESHI_LANG_ROOT; 251 252 /** 253 * The error message associated with an error 254 * @var string 255 * @todo check err reporting works 256 */ 257 protected $error = false; 258 259 /** 260 * Possible error messages 261 * @var array 262 */ 263 protected $error_messages = array( 264 GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})', 265 GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable', 266 GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid', 267 GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid' 268 ); 269 270 /** 271 * Whether highlighting is strict or not 272 * @var boolean 273 */ 274 protected $strict_mode = false; 275 276 /** 277 * Whether to use CSS classes in output 278 * @var boolean 279 */ 280 protected $use_classes = false; 281 282 /** 283 * The type of header to use. Can be one of the following 284 * values: 285 * 286 * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element. 287 * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element. 288 * - GESHI_HEADER_NONE: No header is outputted. 289 * 290 * @var int 291 */ 292 protected $header_type = GESHI_HEADER_PRE; 293 294 /** 295 * Array of permissions for which lexics should be highlighted 296 * @var array 297 */ 298 protected $lexic_permissions = array( 299 'KEYWORDS' => array(), 300 'COMMENTS' => array('MULTI' => true), 301 'REGEXPS' => array(), 302 'ESCAPE_CHAR' => true, 303 'BRACKETS' => true, 304 'SYMBOLS' => false, 305 'STRINGS' => true, 306 'NUMBERS' => true, 307 'METHODS' => true, 308 'SCRIPT' => true 309 ); 310 311 /** 312 * The time it took to parse the code 313 * @var double 314 */ 315 protected $time = 0; 316 317 /** 318 * The content of the header block 319 * @var string 320 */ 321 protected $header_content = ''; 322 323 /** 324 * The content of the footer block 325 * @var string 326 */ 327 protected $footer_content = ''; 328 329 /** 330 * The style of the header block 331 * @var string 332 */ 333 protected $header_content_style = ''; 334 335 /** 336 * The style of the footer block 337 * @var string 338 */ 339 protected $footer_content_style = ''; 340 341 /** 342 * Tells if a block around the highlighted source should be forced 343 * if not using line numbering 344 * @var boolean 345 */ 346 protected $force_code_block = false; 347 348 /** 349 * The styles for hyperlinks in the code 350 * @var array 351 */ 352 protected $link_styles = array(); 353 354 /** 355 * Whether important blocks should be recognised or not 356 * @var boolean 357 * @deprecated 358 * @todo REMOVE THIS FUNCTIONALITY! 359 */ 360 protected $enable_important_blocks = false; 361 362 /** 363 * Styles for important parts of the code 364 * @var string 365 * @deprecated 366 * @todo As above - rethink the whole idea of important blocks as it is buggy and 367 * will be hard to implement in 1.2 368 */ 369 protected $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code 370 371 /** 372 * Whether CSS IDs should be added to the code 373 * @var boolean 374 */ 375 protected $add_ids = false; 376 377 /** 378 * Lines that should be highlighted extra 379 * @var array 380 */ 381 protected $highlight_extra_lines = array(); 382 383 /** 384 * Styles of lines that should be highlighted extra 385 * @var array 386 */ 387 protected $highlight_extra_lines_styles = array(); 388 389 /** 390 * Styles of extra-highlighted lines 391 * @var string 392 */ 393 protected $highlight_extra_lines_style = 'background-color: #ffc;'; 394 395 /** 396 * The line ending 397 * If null, nl2br() will be used on the result string. 398 * Otherwise, all instances of \n will be replaced with $line_ending 399 * @var string 400 */ 401 protected $line_ending = null; 402 403 /** 404 * Number at which line numbers should start at 405 * @var int 406 */ 407 protected $line_numbers_start = 1; 408 409 /** 410 * The overall style for this code block 411 * @var string 412 */ 413 protected $overall_style = 'font-family:monospace;'; 414 415 /** 416 * The style for the actual code 417 * @var string 418 */ 419 protected $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;'; 420 421 /** 422 * The overall class for this code block 423 * @var string 424 */ 425 protected $overall_class = ''; 426 427 /** 428 * The overall ID for this code block 429 * @var string 430 */ 431 protected $overall_id = ''; 432 433 /** 434 * Line number styles 435 * @var string 436 */ 437 protected $line_style1 = 'font-weight: normal; vertical-align:top;'; 438 439 /** 440 * Line number styles for fancy lines 441 * @var string 442 */ 443 protected $line_style2 = 'font-weight: bold; vertical-align:top;'; 444 445 /** 446 * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen 447 * @var string 448 */ 449 protected $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;'; 450 451 /** 452 * Flag for how line numbers are displayed 453 * @var boolean 454 */ 455 protected $line_numbers = GESHI_NO_LINE_NUMBERS; 456 457 /** 458 * Flag to decide if multi line spans are allowed. Set it to false to make sure 459 * each tag is closed before and reopened after each linefeed. 460 * @var boolean 461 */ 462 protected $allow_multiline_span = true; 463 464 /** 465 * The "nth" value for fancy line highlighting 466 * @var int 467 */ 468 protected $line_nth_row = 0; 469 470 /** 471 * The size of tab stops 472 * @var int 473 */ 474 protected $tab_width = 8; 475 476 /** 477 * Should we use language-defined tab stop widths? 478 * @var int 479 */ 480 protected $use_language_tab_width = false; 481 482 /** 483 * Default target for keyword links 484 * @var string 485 */ 486 protected $link_target = ''; 487 488 /** 489 * The encoding to use for entity encoding 490 * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598) 491 * @var string 492 */ 493 protected $encoding = 'utf-8'; 494 495 /** 496 * Should keywords be linked? 497 * @var boolean 498 */ 499 protected $keyword_links = true; 500 501 /** 502 * Currently loaded language file 503 * @var string 504 * @since 1.0.7.22 505 */ 506 protected $loaded_language = ''; 507 508 /** 509 * Wether the caches needed for parsing are built or not 510 * 511 * @var bool 512 * @since 1.0.8 513 */ 514 protected $parse_cache_built = false; 515 516 /** 517 * Work around for Suhosin Patch with disabled /e modifier 518 * 519 * Note from suhosins author in config file: 520 * <blockquote> 521 * The /e modifier inside <code>preg_replace()</code> allows code execution. 522 * Often it is the cause for remote code execution exploits. It is wise to 523 * deactivate this feature and test where in the application it is used. 524 * The developer using the /e modifier should be made aware that he should 525 * use <code>preg_replace_callback()</code> instead 526 * </blockquote> 527 * 528 * @var array 529 * @since 1.0.8 530 */ 531 protected $_kw_replace_group = 0; 532 protected $_rx_key = 0; 533 534 /** 535 * some "callback parameters" for handle_multiline_regexps 536 * 537 * @since 1.0.8 538 * @access private 539 * @var string 540 */ 541 protected $_hmr_before = ''; 542 protected $_hmr_replace = ''; 543 protected $_hmr_after = ''; 544 protected $_hmr_key = 0; 545 546 /** 547 * Creates a new GeSHi object, with source and language 548 * 549 * @param string $source The source code to highlight 550 * @param string $language The language to highlight the source with 551 * @param string $path The path to the language file directory. <b>This 552 * is deprecated!</b> I've backported the auto path 553 * detection from the 1.1.X dev branch, so now it 554 * should be automatically set correctly. If you have 555 * renamed the language directory however, you will 556 * still need to set the path using this parameter or 557 * {@link GeSHi->set_language_path()} 558 * @since 1.0.0 559 */ 560 public function __construct($source = '', $language = '', $path = '') { 561 if ( is_string($source) && ($source !== '') ) { 562 $this->set_source($source); 563 } 564 if ( is_string($language) && ($language !== '') ) { 565 $this->set_language($language); 566 } 567 $this->set_language_path($path); 568 } 569 570 /** 571 * Returns the version of GeSHi 572 * 573 * @return string 574 * @since 1.0.8.11 575 */ 576 public function get_version() 577 { 578 return GESHI_VERSION; 579 } 580 581 /** 582 * Returns an error message associated with the last GeSHi operation, 583 * or false if no error has occurred 584 * 585 * @return string|false An error message if there has been an error, else false 586 * @since 1.0.0 587 */ 588 public function error() { 589 if ($this->error) { 590 //Put some template variables for debugging here ... 591 $debug_tpl_vars = array( 592 '{LANGUAGE}' => $this->language, 593 '{PATH}' => $this->language_path 594 ); 595 $msg = str_replace( 596 array_keys($debug_tpl_vars), 597 array_values($debug_tpl_vars), 598 $this->error_messages[$this->error]); 599 600 return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />"; 601 } 602 return false; 603 } 604 605 /** 606 * Gets a human-readable language name (thanks to Simon Patterson 607 * for the idea :)) 608 * 609 * @return string The name for the current language 610 * @since 1.0.2 611 */ 612 public function get_language_name() { 613 if (GESHI_ERROR_NO_SUCH_LANG == $this->error) { 614 return $this->language_data['LANG_NAME'] . ' (Unknown Language)'; 615 } 616 return $this->language_data['LANG_NAME']; 617 } 618 619 /** 620 * Sets the source code for this object 621 * 622 * @param string $source The source code to highlight 623 * @since 1.0.0 624 */ 625 public function set_source($source) { 626 $this->source = $source; 627 $this->highlight_extra_lines = array(); 628 } 629 630 /** 631 * Clean up the language name to prevent malicious code injection 632 * 633 * @param string $language The name of the language to strip 634 * @since 1.0.9.1 635 */ 636 public function strip_language_name($language) { 637 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language); 638 $language = strtolower($language); 639 640 return $language; 641 } 642 643 /** 644 * Sets the language for this object 645 * 646 * @note since 1.0.8 this function won't reset language-settings by default anymore! 647 * if you need this set $force_reset = true 648 * 649 * @param string $language The name of the language to use 650 * @param bool $force_reset 651 * @since 1.0.0 652 */ 653 public function set_language($language, $force_reset = false) { 654 $this->error = false; 655 $this->strict_mode = GESHI_NEVER; 656 657 if ($force_reset) { 658 $this->loaded_language = false; 659 } 660 661 //Clean up the language name to prevent malicious code injection 662 $language = $this->strip_language_name($language); 663 664 //Retreive the full filename 665 $file_name = $this->language_path . $language . '.php'; 666 if ($file_name == $this->loaded_language) { 667 // this language is already loaded! 668 return; 669 } 670 671 $this->language = $language; 672 673 //Check if we can read the desired file 674 if (!is_readable($file_name)) { 675 $this->error = GESHI_ERROR_NO_SUCH_LANG; 676 return; 677 } 678 679 // Load the language for parsing 680 $this->load_language($file_name); 681 } 682 683 /** 684 * Sets the path to the directory containing the language files. Note 685 * that this path is relative to the directory of the script that included 686 * geshi.php, NOT geshi.php itself. 687 * 688 * @param string $path The path to the language directory 689 * @since 1.0.0 690 * @deprecated The path to the language files should now be automatically 691 * detected, so this method should no longer be needed. The 692 * 1.1.X branch handles manual setting of the path differently 693 * so this method will disappear in 1.2.0. 694 */ 695 public function set_language_path($path) { 696 if(strpos($path,':')) { 697 //Security Fix to prevent external directories using fopen wrappers. 698 if(DIRECTORY_SEPARATOR == "\\") { 699 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) { 700 return; 701 } 702 } else { 703 return; 704 } 705 } 706 if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) { 707 //Security Fix to prevent external directories using fopen wrappers. 708 return; 709 } 710 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) { 711 //Security Fix to prevent external directories using fopen wrappers. 712 return; 713 } 714 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) { 715 //Security Fix to prevent external directories using fopen wrappers. 716 return; 717 } 718 if ($path) { 719 $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/'; 720 $this->set_language($this->language); // otherwise set_language_path has no effect 721 } 722 } 723 724 /** 725 * Get supported langs or an associative array lang=>full_name. 726 * @param boolean $full_names 727 * @return array 728 */ 729 public function get_supported_languages($full_names=false) 730 { 731 // return array 732 $back = array(); 733 734 // we walk the lang root 735 $dir = dir($this->language_path); 736 737 // foreach entry 738 while (false !== ($entry = $dir->read())) 739 { 740 $full_path = $this->language_path.$entry; 741 742 // Skip all dirs 743 if (is_dir($full_path)) { 744 continue; 745 } 746 747 // we only want lang.php files 748 if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) { 749 continue; 750 } 751 752 // Raw lang name is here 753 $langname = $matches[1]; 754 755 // We want the fullname too? 756 if ($full_names === true) 757 { 758 if (false !== ($fullname = $this->get_language_fullname($langname))) 759 { 760 $back[$langname] = $fullname; // we go associative 761 } 762 } 763 else 764 { 765 // just store raw langname 766 $back[] = $langname; 767 } 768 } 769 770 $dir->close(); 771 772 return $back; 773 } 774 775 /** 776 * Get full_name for a lang or false. 777 * @param string $language short langname (html4strict for example) 778 * @return mixed 779 */ 780 public function get_language_fullname($language) 781 { 782 //Clean up the language name to prevent malicious code injection 783 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language); 784 785 $language = strtolower($language); 786 787 // get fullpath-filename for a langname 788 $fullpath = $this->language_path.$language.'.php'; 789 790 // we need to get contents :S 791 if (false === ($data = file_get_contents($fullpath))) { 792 $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language); 793 return false; 794 } 795 796 // match the langname 797 if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+?)\'/', $data, $matches)) { 798 $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language); 799 return false; 800 } 801 802 // return fullname for langname 803 return stripcslashes($matches[1]); 804 } 805 806 /** 807 * Sets the type of header to be used. 808 * 809 * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This 810 * means more source code but more control over tab width and line-wrapping. 811 * GESHI_HEADER_PRE means that a "pre" is used - less source, but less 812 * control. Default is GESHI_HEADER_PRE. 813 * 814 * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code 815 * should be outputted. 816 * 817 * @param int $type The type of header to be used 818 * @since 1.0.0 819 */ 820 public function set_header_type($type) { 821 //Check if we got a valid header type 822 if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV, 823 GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) { 824 $this->error = GESHI_ERROR_INVALID_HEADER_TYPE; 825 return; 826 } 827 828 //Set that new header type 829 $this->header_type = $type; 830 } 831 832 /** 833 * Sets the styles for the code that will be outputted 834 * when this object is parsed. The style should be a 835 * string of valid stylesheet declarations 836 * 837 * @param string $style The overall style for the outputted code block 838 * @param boolean $preserve_defaults Whether to merge the styles with the current styles or not 839 * @since 1.0.0 840 */ 841 public function set_overall_style($style, $preserve_defaults = false) { 842 if (!$preserve_defaults) { 843 $this->overall_style = $style; 844 } else { 845 $this->overall_style .= $style; 846 } 847 } 848 849 /** 850 * Sets the overall classname for this block of code. This 851 * class can then be used in a stylesheet to style this object's 852 * output 853 * 854 * @param string $class The class name to use for this block of code 855 * @since 1.0.0 856 */ 857 public function set_overall_class($class) { 858 $this->overall_class = $class; 859 } 860 861 /** 862 * Sets the overall id for this block of code. This id can then 863 * be used in a stylesheet to style this object's output 864 * 865 * @param string $id The ID to use for this block of code 866 * @since 1.0.0 867 */ 868 public function set_overall_id($id) { 869 $this->overall_id = $id; 870 } 871 872 /** 873 * Sets whether CSS classes should be used to highlight the source. Default 874 * is off, calling this method with no arguments will turn it on 875 * 876 * @param boolean $flag Whether to turn classes on or not 877 * @since 1.0.0 878 */ 879 public function enable_classes($flag = true) { 880 $this->use_classes = ($flag) ? true : false; 881 } 882 883 /** 884 * Sets the style for the actual code. This should be a string 885 * containing valid stylesheet declarations. If $preserve_defaults is 886 * true, then styles are merged with the default styles, with the 887 * user defined styles having priority 888 * 889 * Note: Use this method to override any style changes you made to 890 * the line numbers if you are using line numbers, else the line of 891 * code will have the same style as the line number! Consult the 892 * GeSHi documentation for more information about this. 893 * 894 * @param string $style The style to use for actual code 895 * @param boolean $preserve_defaults Whether to merge the current styles with the new styles 896 * @since 1.0.2 897 */ 898 public function set_code_style($style, $preserve_defaults = false) { 899 if (!$preserve_defaults) { 900 $this->code_style = $style; 901 } else { 902 $this->code_style .= $style; 903 } 904 } 905 906 /** 907 * Sets the styles for the line numbers. 908 * 909 * @param string $style1 The style for the line numbers that are "normal" 910 * @param string|boolean $style2 If a string, this is the style of the line 911 * numbers that are "fancy", otherwise if boolean then this 912 * defines whether the normal styles should be merged with the 913 * new normal styles or not 914 * @param boolean $preserve_defaults If set, is the flag for whether to merge the "fancy" 915 * styles with the current styles or not 916 * @since 1.0.2 917 */ 918 public function set_line_style($style1, $style2 = '', $preserve_defaults = false) { 919 //Check if we got 2 or three parameters 920 if (is_bool($style2)) { 921 $preserve_defaults = $style2; 922 $style2 = ''; 923 } 924 925 //Actually set the new styles 926 if (!$preserve_defaults) { 927 $this->line_style1 = $style1; 928 $this->line_style2 = $style2; 929 } else { 930 $this->line_style1 .= $style1; 931 $this->line_style2 .= $style2; 932 } 933 } 934 935 /** 936 * Sets whether line numbers should be displayed. 937 * 938 * Valid values for the first parameter are: 939 * 940 * - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed 941 * - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed 942 * - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed 943 * 944 * For fancy line numbers, the second parameter is used to signal which lines 945 * are to be fancy. For example, if the value of this parameter is 5 then every 946 * 5th line will be fancy. 947 * 948 * @param int $flag How line numbers should be displayed 949 * @param int $nth_row Defines which lines are fancy 950 * @since 1.0.0 951 */ 952 public function enable_line_numbers($flag, $nth_row = 5) { 953 if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag 954 && GESHI_FANCY_LINE_NUMBERS != $flag) { 955 $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE; 956 } 957 $this->line_numbers = $flag; 958 $this->line_nth_row = $nth_row; 959 } 960 961 /** 962 * Sets wether spans and other HTML markup generated by GeSHi can 963 * span over multiple lines or not. Defaults to true to reduce overhead. 964 * Set it to false if you want to manipulate the output or manually display 965 * the code in an ordered list. 966 * 967 * @param boolean $flag Wether multiline spans are allowed or not 968 * @since 1.0.7.22 969 */ 970 public function enable_multiline_span($flag) { 971 $this->allow_multiline_span = (bool) $flag; 972 } 973 974 /** 975 * Get current setting for multiline spans, see GeSHi->enable_multiline_span(). 976 * 977 * @see enable_multiline_span 978 * @return bool 979 */ 980 public function get_multiline_span() { 981 return $this->allow_multiline_span; 982 } 983 984 /** 985 * Sets the style for a keyword group. If $preserve_defaults is 986 * true, then styles are merged with the default styles, with the 987 * user defined styles having priority 988 * 989 * @param int $key The key of the keyword group to change the styles of 990 * @param string $style The style to make the keywords 991 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 992 * to overwrite them 993 * @since 1.0.0 994 */ 995 public function set_keyword_group_style($key, $style, $preserve_defaults = false) { 996 //Set the style for this keyword group 997 if('*' == $key) { 998 foreach($this->language_data['STYLES']['KEYWORDS'] as $_key => $_value) { 999 if (!$preserve_defaults) { 1000 $this->language_data['STYLES']['KEYWORDS'][$_key] = $style; 1001 } else { 1002 $this->language_data['STYLES']['KEYWORDS'][$_key] .= $style; 1003 } 1004 } 1005 } else { 1006 if (!$preserve_defaults) { 1007 $this->language_data['STYLES']['KEYWORDS'][$key] = $style; 1008 } else { 1009 $this->language_data['STYLES']['KEYWORDS'][$key] .= $style; 1010 } 1011 } 1012 1013 //Update the lexic permissions 1014 if (!isset($this->lexic_permissions['KEYWORDS'][$key])) { 1015 $this->lexic_permissions['KEYWORDS'][$key] = true; 1016 } 1017 } 1018 1019 /** 1020 * Turns highlighting on/off for a keyword group 1021 * 1022 * @param int $key The key of the keyword group to turn on or off 1023 * @param boolean $flag Whether to turn highlighting for that group on or off 1024 * @since 1.0.0 1025 */ 1026 public function set_keyword_group_highlighting($key, $flag = true) { 1027 $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false; 1028 } 1029 1030 /** 1031 * Sets the styles for comment groups. If $preserve_defaults is 1032 * true, then styles are merged with the default styles, with the 1033 * user defined styles having priority 1034 * 1035 * @param int $key The key of the comment group to change the styles of 1036 * @param string $style The style to make the comments 1037 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1038 * to overwrite them 1039 * @since 1.0.0 1040 */ 1041 public function set_comments_style($key, $style, $preserve_defaults = false) { 1042 if('*' == $key) { 1043 foreach($this->language_data['STYLES']['COMMENTS'] as $_key => $_value) { 1044 if (!$preserve_defaults) { 1045 $this->language_data['STYLES']['COMMENTS'][$_key] = $style; 1046 } else { 1047 $this->language_data['STYLES']['COMMENTS'][$_key] .= $style; 1048 } 1049 } 1050 } else { 1051 if (!$preserve_defaults) { 1052 $this->language_data['STYLES']['COMMENTS'][$key] = $style; 1053 } else { 1054 $this->language_data['STYLES']['COMMENTS'][$key] .= $style; 1055 } 1056 } 1057 } 1058 1059 /** 1060 * Turns highlighting on/off for comment groups 1061 * 1062 * @param int $key The key of the comment group to turn on or off 1063 * @param boolean $flag Whether to turn highlighting for that group on or off 1064 * @since 1.0.0 1065 */ 1066 public function set_comments_highlighting($key, $flag = true) { 1067 $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false; 1068 } 1069 1070 /** 1071 * Sets the styles for escaped characters. If $preserve_defaults is 1072 * true, then styles are merged with the default styles, with the 1073 * user defined styles having priority 1074 * 1075 * @param string $style The style to make the escape characters 1076 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1077 * to overwrite them 1078 * @param int $group Tells the group of symbols for which style should be set. 1079 * @since 1.0.0 1080 */ 1081 public function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) { 1082 if (!$preserve_defaults) { 1083 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style; 1084 } else { 1085 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style; 1086 } 1087 } 1088 1089 /** 1090 * Turns highlighting on/off for escaped characters 1091 * 1092 * @param boolean $flag Whether to turn highlighting for escape characters on or off 1093 * @since 1.0.0 1094 */ 1095 public function set_escape_characters_highlighting($flag = true) { 1096 $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false; 1097 } 1098 1099 /** 1100 * Sets the styles for brackets. If $preserve_defaults is 1101 * true, then styles are merged with the default styles, with the 1102 * user defined styles having priority 1103 * 1104 * This method is DEPRECATED: use set_symbols_style instead. 1105 * This method will be removed in 1.2.X 1106 * 1107 * @param string $style The style to make the brackets 1108 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1109 * to overwrite them 1110 * @since 1.0.0 1111 * @deprecated In favour of set_symbols_style 1112 */ 1113 public function set_brackets_style($style, $preserve_defaults = false) { 1114 if (!$preserve_defaults) { 1115 $this->language_data['STYLES']['BRACKETS'][0] = $style; 1116 } else { 1117 $this->language_data['STYLES']['BRACKETS'][0] .= $style; 1118 } 1119 } 1120 1121 /** 1122 * Turns highlighting on/off for brackets 1123 * 1124 * This method is DEPRECATED: use set_symbols_highlighting instead. 1125 * This method will be remove in 1.2.X 1126 * 1127 * @param boolean $flag Whether to turn highlighting for brackets on or off 1128 * @since 1.0.0 1129 * @deprecated In favour of set_symbols_highlighting 1130 */ 1131 public function set_brackets_highlighting($flag) { 1132 $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false; 1133 } 1134 1135 /** 1136 * Sets the styles for symbols. If $preserve_defaults is 1137 * true, then styles are merged with the default styles, with the 1138 * user defined styles having priority 1139 * 1140 * @param string $style The style to make the symbols 1141 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1142 * to overwrite them 1143 * @param int $group Tells the group of symbols for which style should be set. 1144 * @since 1.0.1 1145 */ 1146 public function set_symbols_style($style, $preserve_defaults = false, $group = 0) { 1147 // Update the style of symbols 1148 if (!$preserve_defaults) { 1149 $this->language_data['STYLES']['SYMBOLS'][$group] = $style; 1150 } else { 1151 $this->language_data['STYLES']['SYMBOLS'][$group] .= $style; 1152 } 1153 1154 // For backward compatibility 1155 if (0 == $group) { 1156 $this->set_brackets_style ($style, $preserve_defaults); 1157 } 1158 } 1159 1160 /** 1161 * Turns highlighting on/off for symbols 1162 * 1163 * @param boolean $flag Whether to turn highlighting for symbols on or off 1164 * @since 1.0.0 1165 */ 1166 public function set_symbols_highlighting($flag) { 1167 // Update lexic permissions for this symbol group 1168 $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false; 1169 1170 // For backward compatibility 1171 $this->set_brackets_highlighting ($flag); 1172 } 1173 1174 /** 1175 * Sets the styles for strings. If $preserve_defaults is 1176 * true, then styles are merged with the default styles, with the 1177 * user defined styles having priority 1178 * 1179 * @param string $style The style to make the escape characters 1180 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1181 * to overwrite them 1182 * @param int $group Tells the group of strings for which style should be set. 1183 * @since 1.0.0 1184 */ 1185 public function set_strings_style($style, $preserve_defaults = false, $group = 0) { 1186 if (!$preserve_defaults) { 1187 $this->language_data['STYLES']['STRINGS'][$group] = $style; 1188 } else { 1189 $this->language_data['STYLES']['STRINGS'][$group] .= $style; 1190 } 1191 } 1192 1193 /** 1194 * Turns highlighting on/off for strings 1195 * 1196 * @param boolean $flag Whether to turn highlighting for strings on or off 1197 * @since 1.0.0 1198 */ 1199 public function set_strings_highlighting($flag) { 1200 $this->lexic_permissions['STRINGS'] = ($flag) ? true : false; 1201 } 1202 1203 /** 1204 * Sets the styles for strict code blocks. If $preserve_defaults is 1205 * true, then styles are merged with the default styles, with the 1206 * user defined styles having priority 1207 * 1208 * @param string $style The style to make the script blocks 1209 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1210 * to overwrite them 1211 * @param int $group Tells the group of script blocks for which style should be set. 1212 * @since 1.0.8.4 1213 */ 1214 public function set_script_style($style, $preserve_defaults = false, $group = 0) { 1215 // Update the style of symbols 1216 if (!$preserve_defaults) { 1217 $this->language_data['STYLES']['SCRIPT'][$group] = $style; 1218 } else { 1219 $this->language_data['STYLES']['SCRIPT'][$group] .= $style; 1220 } 1221 } 1222 1223 /** 1224 * Sets the styles for numbers. If $preserve_defaults is 1225 * true, then styles are merged with the default styles, with the 1226 * user defined styles having priority 1227 * 1228 * @param string $style The style to make the numbers 1229 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1230 * to overwrite them 1231 * @param int $group Tells the group of numbers for which style should be set. 1232 * @since 1.0.0 1233 */ 1234 public function set_numbers_style($style, $preserve_defaults = false, $group = 0) { 1235 if (!$preserve_defaults) { 1236 $this->language_data['STYLES']['NUMBERS'][$group] = $style; 1237 } else { 1238 $this->language_data['STYLES']['NUMBERS'][$group] .= $style; 1239 } 1240 } 1241 1242 /** 1243 * Turns highlighting on/off for numbers 1244 * 1245 * @param boolean $flag Whether to turn highlighting for numbers on or off 1246 * @since 1.0.0 1247 */ 1248 public function set_numbers_highlighting($flag) { 1249 $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false; 1250 } 1251 1252 /** 1253 * Sets the styles for methods. $key is a number that references the 1254 * appropriate "object splitter" - see the language file for the language 1255 * you are highlighting to get this number. If $preserve_defaults is 1256 * true, then styles are merged with the default styles, with the 1257 * user defined styles having priority 1258 * 1259 * @param int $key The key of the object splitter to change the styles of 1260 * @param string $style The style to make the methods 1261 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1262 * to overwrite them 1263 * @since 1.0.0 1264 */ 1265 public function set_methods_style($key, $style, $preserve_defaults = false) { 1266 if (!$preserve_defaults) { 1267 $this->language_data['STYLES']['METHODS'][$key] = $style; 1268 } else { 1269 $this->language_data['STYLES']['METHODS'][$key] .= $style; 1270 } 1271 } 1272 1273 /** 1274 * Turns highlighting on/off for methods 1275 * 1276 * @param boolean $flag Whether to turn highlighting for methods on or off 1277 * @since 1.0.0 1278 */ 1279 public function set_methods_highlighting($flag) { 1280 $this->lexic_permissions['METHODS'] = ($flag) ? true : false; 1281 } 1282 1283 /** 1284 * Sets the styles for regexps. If $preserve_defaults is 1285 * true, then styles are merged with the default styles, with the 1286 * user defined styles having priority 1287 * 1288 * @param string $key The style to make the regular expression matches 1289 * @param boolean $style Whether to merge the new styles with the old or just 1290 * to overwrite them 1291 * @param bool $preserve_defaults Whether to merge the new styles with the old or just 1292 * to overwrite them 1293 * @since 1.0.0 1294 */ 1295 public function set_regexps_style($key, $style, $preserve_defaults = false) { 1296 if (!$preserve_defaults) { 1297 $this->language_data['STYLES']['REGEXPS'][$key] = $style; 1298 } else { 1299 $this->language_data['STYLES']['REGEXPS'][$key] .= $style; 1300 } 1301 } 1302 1303 /** 1304 * Turns highlighting on/off for regexps 1305 * 1306 * @param int $key The key of the regular expression group to turn on or off 1307 * @param boolean $flag Whether to turn highlighting for the regular expression group on or off 1308 * @since 1.0.0 1309 */ 1310 public function set_regexps_highlighting($key, $flag) { 1311 $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false; 1312 } 1313 1314 /** 1315 * Sets whether a set of keywords are checked for in a case sensitive manner 1316 * 1317 * @param int $key The key of the keyword group to change the case sensitivity of 1318 * @param boolean $case Whether to check in a case sensitive manner or not 1319 * @since 1.0.0 1320 */ 1321 public function set_case_sensitivity($key, $case) { 1322 $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false; 1323 } 1324 1325 /** 1326 * Sets the case that keywords should use when found. Use the constants: 1327 * 1328 * - GESHI_CAPS_NO_CHANGE: leave keywords as-is 1329 * - GESHI_CAPS_UPPER: convert all keywords to uppercase where found 1330 * - GESHI_CAPS_LOWER: convert all keywords to lowercase where found 1331 * 1332 * @param int $case A constant specifying what to do with matched keywords 1333 * @since 1.0.1 1334 */ 1335 public function set_case_keywords($case) { 1336 if (in_array($case, array( 1337 GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) { 1338 $this->language_data['CASE_KEYWORDS'] = $case; 1339 } 1340 } 1341 1342 /** 1343 * Sets how many spaces a tab is substituted for 1344 * 1345 * Widths below zero are ignored 1346 * 1347 * @param int $width The tab width 1348 * @since 1.0.0 1349 */ 1350 public function set_tab_width($width) { 1351 $this->tab_width = intval($width); 1352 1353 //Check if it fit's the constraints: 1354 if ($this->tab_width < 1) { 1355 //Return it to the default 1356 $this->tab_width = 8; 1357 } 1358 } 1359 1360 /** 1361 * Sets whether or not to use tab-stop width specifed by language 1362 * 1363 * @param boolean $use Whether to use language-specific tab-stop widths 1364 * @since 1.0.7.20 1365 */ 1366 public function set_use_language_tab_width($use) { 1367 $this->use_language_tab_width = (bool) $use; 1368 } 1369 1370 /** 1371 * Returns the tab width to use, based on the current language and user 1372 * preference 1373 * 1374 * @return int Tab width 1375 * @since 1.0.7.20 1376 */ 1377 public function get_real_tab_width() { 1378 if (!$this->use_language_tab_width || 1379 !isset($this->language_data['TAB_WIDTH'])) { 1380 return $this->tab_width; 1381 } else { 1382 return $this->language_data['TAB_WIDTH']; 1383 } 1384 } 1385 1386 /** 1387 * Enables/disables strict highlighting. Default is off, calling this 1388 * method without parameters will turn it on. See documentation 1389 * for more details on strict mode and where to use it. 1390 * 1391 * @param boolean $mode Whether to enable strict mode or not 1392 * @since 1.0.0 1393 */ 1394 public function enable_strict_mode($mode = true) { 1395 if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) { 1396 $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER; 1397 } 1398 } 1399 1400 /** 1401 * Disables all highlighting 1402 * 1403 * @since 1.0.0 1404 * @todo Rewrite with array traversal 1405 * @deprecated In favour of enable_highlighting 1406 */ 1407 public function disable_highlighting() { 1408 $this->enable_highlighting(false); 1409 } 1410 1411 /** 1412 * Enables all highlighting 1413 * 1414 * The optional flag parameter was added in version 1.0.7.21 and can be used 1415 * to enable (true) or disable (false) all highlighting. 1416 * 1417 * @since 1.0.0 1418 * @param boolean $flag A flag specifying whether to enable or disable all highlighting 1419 * @todo Rewrite with array traversal 1420 */ 1421 public function enable_highlighting($flag = true) { 1422 $flag = $flag ? true : false; 1423 foreach ($this->lexic_permissions as $key => $value) { 1424 if (is_array($value)) { 1425 foreach ($value as $k => $v) { 1426 $this->lexic_permissions[$key][$k] = $flag; 1427 } 1428 } else { 1429 $this->lexic_permissions[$key] = $flag; 1430 } 1431 } 1432 1433 // Context blocks 1434 $this->enable_important_blocks = $flag; 1435 } 1436 1437 /** 1438 * Given a file extension, this method returns either a valid geshi language 1439 * name, or the empty string if it couldn't be found 1440 * 1441 * @param string $extension The extension to get a language name for 1442 * @param array $lookup A lookup array to use instead of the default one 1443 * @since 1.0.5 1444 * @todo Re-think about how this method works (maybe make it private and/or make it 1445 * a extension->lang lookup?) 1446 * @return int|string 1447 */ 1448 public static function get_language_name_from_extension( $extension, $lookup = array() ) { 1449 $extension = strtolower($extension); 1450 1451 if ( !is_array($lookup) || empty($lookup)) { 1452 $lookup = array( 1453 '6502acme' => array( 'a', 's', 'asm', 'inc' ), 1454 '6502tasm' => array( 'a', 's', 'asm', 'inc' ), 1455 '6502kickass' => array( 'a', 's', 'asm', 'inc' ), 1456 '68000devpac' => array( 'a', 's', 'asm', 'inc' ), 1457 'abap' => array('abap'), 1458 'actionscript' => array('as'), 1459 'ada' => array('a', 'ada', 'adb', 'ads'), 1460 'apache' => array('conf'), 1461 'asm' => array('ash', 'asm', 'inc'), 1462 'asp' => array('asp'), 1463 'bash' => array('sh'), 1464 'bf' => array('bf'), 1465 'c' => array('c', 'h'), 1466 'c_mac' => array('c', 'h'), 1467 'caddcl' => array(), 1468 'cadlisp' => array(), 1469 'cdfg' => array('cdfg'), 1470 'cobol' => array('cbl'), 1471 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'), 1472 'csharp' => array('cs'), 1473 'css' => array('css'), 1474 'd' => array('d'), 1475 'delphi' => array('dpk', 'dpr', 'pp', 'pas'), 1476 'diff' => array('diff', 'patch'), 1477 'dos' => array('bat', 'cmd'), 1478 'gdb' => array('kcrash', 'crash', 'bt'), 1479 'gettext' => array('po', 'pot'), 1480 'gml' => array('gml'), 1481 'gnuplot' => array('plt'), 1482 'groovy' => array('groovy'), 1483 'haskell' => array('hs'), 1484 'haxe' => array('hx'), 1485 'html4strict' => array('html', 'htm'), 1486 'ini' => array('ini', 'desktop', 'vbp'), 1487 'java' => array('java'), 1488 'javascript' => array('js'), 1489 'klonec' => array('kl1'), 1490 'klonecpp' => array('klx'), 1491 'latex' => array('tex'), 1492 'lisp' => array('lisp'), 1493 'lua' => array('lua'), 1494 'matlab' => array('m'), 1495 'mpasm' => array(), 1496 'mysql' => array('sql'), 1497 'nsis' => array(), 1498 'objc' => array(), 1499 'oobas' => array(), 1500 'oracle8' => array(), 1501 'oracle10' => array(), 1502 'pascal' => array('pas'), 1503 'perl' => array('pl', 'pm'), 1504 'php' => array('php', 'php5', 'phtml', 'phps'), 1505 'povray' => array('pov'), 1506 'providex' => array('pvc', 'pvx'), 1507 'prolog' => array('pl'), 1508 'python' => array('py'), 1509 'qbasic' => array('bi'), 1510 'reg' => array('reg'), 1511 'ruby' => array('rb'), 1512 'sas' => array('sas'), 1513 'scala' => array('scala'), 1514 'scheme' => array('scm'), 1515 'scilab' => array('sci'), 1516 'smalltalk' => array('st'), 1517 'smarty' => array(), 1518 'tcl' => array('tcl'), 1519 'text' => array('txt'), 1520 'vb' => array('bas', 'ctl', 'frm'), 1521 'vbnet' => array('vb', 'sln'), 1522 'visualfoxpro' => array(), 1523 'whitespace' => array('ws'), 1524 'xml' => array('xml', 'svg', 'xrc', 'vbproj', 'csproj', 'userprefs', 'resx', 'stetic', 'settings', 'manifest', 'myapp'), 1525 'z80' => array('z80', 'asm', 'inc') 1526 ); 1527 } 1528 1529 foreach ($lookup as $lang => $extensions) { 1530 if (in_array($extension, $extensions)) { 1531 return $lang; 1532 } 1533 } 1534 1535 return 'text'; 1536 } 1537 1538 /** 1539 * Given a file name, this method loads its contents in, and attempts 1540 * to set the language automatically. An optional lookup table can be 1541 * passed for looking up the language name. If not specified a default 1542 * table is used 1543 * 1544 * The language table is in the form 1545 * <pre>array( 1546 * 'lang_name' => array('extension', 'extension', ...), 1547 * 'lang_name' ... 1548 * );</pre> 1549 * 1550 * @param string $file_name The filename to load the source from 1551 * @param array $lookup A lookup array to use instead of the default one 1552 * @todo Complete rethink of this and above method 1553 * @since 1.0.5 1554 */ 1555 public function load_from_file($file_name, $lookup = array()) { 1556 if (is_readable($file_name)) { 1557 $this->set_source(file_get_contents($file_name)); 1558 $this->set_language(self::get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup)); 1559 } else { 1560 $this->error = GESHI_ERROR_FILE_NOT_READABLE; 1561 } 1562 } 1563 1564 /** 1565 * Adds a keyword to a keyword group for highlighting 1566 * 1567 * @param int $key The key of the keyword group to add the keyword to 1568 * @param string $word The word to add to the keyword group 1569 * @since 1.0.0 1570 */ 1571 public function add_keyword($key, $word) { 1572 if (!is_array($this->language_data['KEYWORDS'][$key])) { 1573 $this->language_data['KEYWORDS'][$key] = array(); 1574 } 1575 if (!in_array($word, $this->language_data['KEYWORDS'][$key])) { 1576 $this->language_data['KEYWORDS'][$key][] = $word; 1577 1578 //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it 1579 if ($this->parse_cache_built) { 1580 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1; 1581 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/'); 1582 } 1583 } 1584 } 1585 1586 /** 1587 * Removes a keyword from a keyword group 1588 * 1589 * @param int $key The key of the keyword group to remove the keyword from 1590 * @param string $word The word to remove from the keyword group 1591 * @param bool $recompile Wether to automatically recompile the optimized regexp list or not. 1592 * Note: if you set this to false and @see GeSHi->parse_code() was already called once, 1593 * for the current language, you have to manually call @see GeSHi->optimize_keyword_group() 1594 * or the removed keyword will stay in cache and still be highlighted! On the other hand 1595 * it might be too expensive to recompile the regexp list for every removal if you want to 1596 * remove a lot of keywords. 1597 * @since 1.0.0 1598 */ 1599 public function remove_keyword($key, $word, $recompile = true) { 1600 $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]); 1601 if ($key_to_remove !== false) { 1602 unset($this->language_data['KEYWORDS'][$key][$key_to_remove]); 1603 1604 //NEW in 1.0.8, optionally recompile keyword group 1605 if ($recompile && $this->parse_cache_built) { 1606 $this->optimize_keyword_group($key); 1607 } 1608 } 1609 } 1610 1611 /** 1612 * Creates a new keyword group 1613 * 1614 * @param int $key The key of the keyword group to create 1615 * @param string $styles The styles for the keyword group 1616 * @param boolean $case_sensitive Whether the keyword group is case sensitive ornot 1617 * @param array $words The words to use for the keyword group 1618 * @since 1.0.0 1619 * @return bool 1620 */ 1621 public function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) { 1622 $words = (array) $words; 1623 if (empty($words)) { 1624 // empty word lists mess up highlighting 1625 return false; 1626 } 1627 1628 //Add the new keyword group internally 1629 $this->language_data['KEYWORDS'][$key] = $words; 1630 $this->lexic_permissions['KEYWORDS'][$key] = true; 1631 $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive; 1632 $this->language_data['STYLES']['KEYWORDS'][$key] = $styles; 1633 1634 //NEW in 1.0.8, cache keyword regexp 1635 if ($this->parse_cache_built) { 1636 $this->optimize_keyword_group($key); 1637 } 1638 return true; 1639 } 1640 1641 /** 1642 * Removes a keyword group 1643 * 1644 * @param int $key The key of the keyword group to remove 1645 * @since 1.0.0 1646 */ 1647 public function remove_keyword_group ($key) { 1648 //Remove the keyword group internally 1649 unset($this->language_data['KEYWORDS'][$key]); 1650 unset($this->lexic_permissions['KEYWORDS'][$key]); 1651 unset($this->language_data['CASE_SENSITIVE'][$key]); 1652 unset($this->language_data['STYLES']['KEYWORDS'][$key]); 1653 1654 //NEW in 1.0.8 1655 unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]); 1656 } 1657 1658 /** 1659 * compile optimized regexp list for keyword group 1660 * 1661 * @param int $key The key of the keyword group to compile & optimize 1662 * @since 1.0.8 1663 */ 1664 public function optimize_keyword_group($key) { 1665 $this->language_data['CACHED_KEYWORD_LISTS'][$key] = 1666 $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]); 1667 $space_as_whitespace = false; 1668 if(isset($this->language_data['PARSER_CONTROL'])) { 1669 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) { 1670 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) { 1671 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE']; 1672 } 1673 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) { 1674 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) { 1675 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE']; 1676 } 1677 } 1678 } 1679 } 1680 if($space_as_whitespace) { 1681 foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) { 1682 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] = 1683 str_replace(" ", "\\s+", $rxv); 1684 } 1685 } 1686 } 1687 1688 /** 1689 * Sets the content of the header block 1690 * 1691 * @param string $content The content of the header block 1692 * @since 1.0.2 1693 */ 1694 public function set_header_content($content) { 1695 $this->header_content = $content; 1696 } 1697 1698 /** 1699 * Sets the content of the footer block 1700 * 1701 * @param string $content The content of the footer block 1702 * @since 1.0.2 1703 */ 1704 public function set_footer_content($content) { 1705 $this->footer_content = $content; 1706 } 1707 1708 /** 1709 * Sets the style for the header content 1710 * 1711 * @param string $style The style for the header content 1712 * @since 1.0.2 1713 */ 1714 public function set_header_content_style($style) { 1715 $this->header_content_style = $style; 1716 } 1717 1718 /** 1719 * Sets the style for the footer content 1720 * 1721 * @param string $style The style for the footer content 1722 * @since 1.0.2 1723 */ 1724 public function set_footer_content_style($style) { 1725 $this->footer_content_style = $style; 1726 } 1727 1728 /** 1729 * Sets whether to force a surrounding block around 1730 * the highlighted code or not 1731 * 1732 * @param boolean $flag Tells whether to enable or disable this feature 1733 * @since 1.0.7.20 1734 */ 1735 public function enable_inner_code_block($flag) { 1736 $this->force_code_block = (bool)$flag; 1737 } 1738 1739 /** 1740 * Sets the base URL to be used for keywords 1741 * 1742 * @param int $group The key of the keyword group to set the URL for 1743 * @param string $url The URL to set for the group. If {FNAME} is in 1744 * the url somewhere, it is replaced by the keyword 1745 * that the URL is being made for 1746 * @since 1.0.2 1747 */ 1748 public function set_url_for_keyword_group($group, $url) { 1749 $this->language_data['URLS'][$group] = $url; 1750 } 1751 1752 /** 1753 * Sets styles for links in code 1754 * 1755 * @param int $type A constant that specifies what state the style is being 1756 * set for - e.g. :hover or :visited 1757 * @param string $styles The styles to use for that state 1758 * @since 1.0.2 1759 */ 1760 public function set_link_styles($type, $styles) { 1761 $this->link_styles[$type] = $styles; 1762 } 1763 1764 /** 1765 * Sets the target for links in code 1766 * 1767 * @param string $target The target for links in the code, e.g. _blank 1768 * @since 1.0.3 1769 */ 1770 public function set_link_target($target) { 1771 if (!$target) { 1772 $this->link_target = ''; 1773 } else { 1774 $this->link_target = ' target="' . $target . '"'; 1775 } 1776 } 1777 1778 /** 1779 * Sets styles for important parts of the code 1780 * 1781 * @param string $styles The styles to use on important parts of the code 1782 * @since 1.0.2 1783 */ 1784 public function set_important_styles($styles) { 1785 $this->important_styles = $styles; 1786 } 1787 1788 /** 1789 * Sets whether context-important blocks are highlighted 1790 * 1791 * @param boolean $flag Tells whether to enable or disable highlighting of important blocks 1792 * @todo REMOVE THIS SHIZ FROM GESHI! 1793 * @deprecated 1794 * @since 1.0.2 1795 */ 1796 public function enable_important_blocks($flag) { 1797 $this->enable_important_blocks = ( $flag ) ? true : false; 1798 } 1799 1800 /** 1801 * Whether CSS IDs should be added to each line 1802 * 1803 * @param boolean $flag If true, IDs will be added to each line. 1804 * @since 1.0.2 1805 */ 1806 public function enable_ids($flag = true) { 1807 $this->add_ids = ($flag) ? true : false; 1808 } 1809 1810 /** 1811 * Specifies which lines to highlight extra 1812 * 1813 * The extra style parameter was added in 1.0.7.21. 1814 * 1815 * @param mixed $lines An array of line numbers to highlight, or just a line 1816 * number on its own. 1817 * @param string $style A string specifying the style to use for this line. 1818 * If null is specified, the default style is used. 1819 * If false is specified, the line will be removed from 1820 * special highlighting 1821 * @since 1.0.2 1822 * @todo Some data replication here that could be cut down on 1823 */ 1824 public function highlight_lines_extra($lines, $style = null) { 1825 if (is_array($lines)) { 1826 //Split up the job using single lines at a time 1827 foreach ($lines as $line) { 1828 $this->highlight_lines_extra($line, $style); 1829 } 1830 } else { 1831 //Mark the line as being highlighted specially 1832 $lines = intval($lines); 1833 $this->highlight_extra_lines[$lines] = $lines; 1834 1835 //Decide on which style to use 1836 if ($style === null) { //Check if we should use default style 1837 unset($this->highlight_extra_lines_styles[$lines]); 1838 } elseif ($style === false) { //Check if to remove this line 1839 unset($this->highlight_extra_lines[$lines]); 1840 unset($this->highlight_extra_lines_styles[$lines]); 1841 } else { 1842 $this->highlight_extra_lines_styles[$lines] = $style; 1843 } 1844 } 1845 } 1846 1847 /** 1848 * Sets the style for extra-highlighted lines 1849 * 1850 * @param string $styles The style for extra-highlighted lines 1851 * @since 1.0.2 1852 */ 1853 public function set_highlight_lines_extra_style($styles) { 1854 $this->highlight_extra_lines_style = $styles; 1855 } 1856 1857 /** 1858 * Sets the line-ending 1859 * 1860 * @param string $line_ending The new line-ending 1861 * @since 1.0.2 1862 */ 1863 public function set_line_ending($line_ending) { 1864 $this->line_ending = (string)$line_ending; 1865 } 1866 1867 /** 1868 * Sets what number line numbers should start at. Should 1869 * be a positive integer, and will be converted to one. 1870 * 1871 * <b>Warning:</b> Using this method will add the "start" 1872 * attribute to the <ol> that is used for line numbering. 1873 * This is <b>not</b> valid XHTML strict, so if that's what you 1874 * care about then don't use this method. Firefox is getting 1875 * support for the CSS method of doing this in 1.1 and Opera 1876 * has support for the CSS method, but (of course) IE doesn't 1877 * so it's not worth doing it the CSS way yet. 1878 * 1879 * @param int $number The number to start line numbers at 1880 * @since 1.0.2 1881 */ 1882 public function start_line_numbers_at($number) { 1883 $this->line_numbers_start = abs(intval($number)); 1884 } 1885 1886 /** 1887 * Sets the encoding used for htmlspecialchars(), for international 1888 * support. 1889 * 1890 * NOTE: This is not needed for now because htmlspecialchars() is not 1891 * being used (it has a security hole in PHP4 that has not been patched). 1892 * Maybe in a future version it may make a return for speed reasons, but 1893 * I doubt it. 1894 * 1895 * @param string $encoding The encoding to use for the source 1896 * @since 1.0.3 1897 */ 1898 public function set_encoding($encoding) { 1899 if ($encoding) { 1900 $this->encoding = strtolower($encoding); 1901 } 1902 } 1903 1904 /** 1905 * Turns linking of keywords on or off. 1906 * 1907 * @param boolean $enable If true, links will be added to keywords 1908 * @since 1.0.2 1909 */ 1910 public function enable_keyword_links($enable = true) { 1911 $this->keyword_links = (bool) $enable; 1912 } 1913 1914 /** 1915 * Setup caches needed for styling. This is automatically called in 1916 * parse_code() and get_stylesheet() when appropriate. This function helps 1917 * stylesheet generators as they rely on some style information being 1918 * preprocessed 1919 * 1920 * @since 1.0.8 1921 */ 1922 protected function build_style_cache() { 1923 //Build the style cache needed to highlight numbers appropriate 1924 if($this->lexic_permissions['NUMBERS']) { 1925 //First check what way highlighting information for numbers are given 1926 if(!isset($this->language_data['NUMBERS'])) { 1927 $this->language_data['NUMBERS'] = 0; 1928 } 1929 1930 if(is_array($this->language_data['NUMBERS'])) { 1931 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS']; 1932 } else { 1933 $this->language_data['NUMBERS_CACHE'] = array(); 1934 if(!$this->language_data['NUMBERS']) { 1935 $this->language_data['NUMBERS'] = 1936 GESHI_NUMBER_INT_BASIC | 1937 GESHI_NUMBER_FLT_NONSCI; 1938 } 1939 1940 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) { 1941 //Rearrange style indices if required ... 1942 if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) { 1943 $this->language_data['STYLES']['NUMBERS'][$i] = 1944 $this->language_data['STYLES']['NUMBERS'][1<<$i]; 1945 unset($this->language_data['STYLES']['NUMBERS'][1<<$i]); 1946 } 1947 1948 //Check if this bit is set for highlighting 1949 if($j&1) { 1950 //So this bit is set ... 1951 //Check if it belongs to group 0 or the actual stylegroup 1952 if(isset($this->language_data['STYLES']['NUMBERS'][$i])) { 1953 $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i; 1954 } else { 1955 if(!isset($this->language_data['NUMBERS_CACHE'][0])) { 1956 $this->language_data['NUMBERS_CACHE'][0] = 0; 1957 } 1958 $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i; 1959 } 1960 } 1961 } 1962 } 1963 } 1964 } 1965 1966 /** 1967 * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate. 1968 * This function makes stylesheet generators much faster as they do not need these caches. 1969 * 1970 * @since 1.0.8 1971 */ 1972 protected function build_parse_cache() { 1973 // check whether language_data is available 1974 if (empty($this->language_data)) { 1975 return false; 1976 } 1977 1978 // cache symbol regexp 1979 //As this is a costy operation, we avoid doing it for multiple groups ... 1980 //Instead we perform it for all symbols at once. 1981 // 1982 //For this to work, we need to reorganize the data arrays. 1983 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) { 1984 $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1; 1985 1986 $this->language_data['SYMBOL_DATA'] = array(); 1987 $symbol_preg_multi = array(); // multi char symbols 1988 $symbol_preg_single = array(); // single char symbols 1989 foreach ($this->language_data['SYMBOLS'] as $key => $symbols) { 1990 if (is_array($symbols)) { 1991 foreach ($symbols as $sym) { 1992 $sym = $this->hsc($sym); 1993 if (!isset($this->language_data['SYMBOL_DATA'][$sym])) { 1994 $this->language_data['SYMBOL_DATA'][$sym] = $key; 1995 if (isset($sym[1])) { // multiple chars 1996 $symbol_preg_multi[] = preg_quote($sym, '/'); 1997 } else { // single char 1998 if ($sym == '-') { 1999 // don't trigger range out of order error 2000 $symbol_preg_single[] = '\-'; 2001 } else { 2002 $symbol_preg_single[] = preg_quote($sym, '/'); 2003 } 2004 } 2005 } 2006 } 2007 } else { 2008 $symbols = $this->hsc($symbols); 2009 if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) { 2010 $this->language_data['SYMBOL_DATA'][$symbols] = 0; 2011 if (isset($symbols[1])) { // multiple chars 2012 $symbol_preg_multi[] = preg_quote($symbols, '/'); 2013 } elseif ($symbols == '-') { 2014 // don't trigger range out of order error 2015 $symbol_preg_single[] = '\-'; 2016 } else { // single char 2017 $symbol_preg_single[] = preg_quote($symbols, '/'); 2018 } 2019 } 2020 } 2021 } 2022 2023 //Now we have an array with each possible symbol as the key and the style as the actual data. 2024 //This way we can set the correct style just the moment we highlight ... 2025 // 2026 //Now we need to rewrite our array to get a search string that 2027 $symbol_preg = array(); 2028 if (!empty($symbol_preg_multi)) { 2029 rsort($symbol_preg_multi); 2030 $symbol_preg[] = implode('|', $symbol_preg_multi); 2031 } 2032 if (!empty($symbol_preg_single)) { 2033 rsort($symbol_preg_single); 2034 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']'; 2035 } 2036 $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg); 2037 } 2038 2039 // cache optimized regexp for keyword matching 2040 // remove old cache 2041 $this->language_data['CACHED_KEYWORD_LISTS'] = array(); 2042 foreach (array_keys($this->language_data['KEYWORDS']) as $key) { 2043 if (!isset($this->lexic_permissions['KEYWORDS'][$key]) || 2044 $this->lexic_permissions['KEYWORDS'][$key]) { 2045 $this->optimize_keyword_group($key); 2046 } 2047 } 2048 2049 // brackets 2050 if ($this->lexic_permissions['BRACKETS']) { 2051 $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}'); 2052 if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) { 2053 $this->language_data['CACHE_BRACKET_REPLACE'] = array( 2054 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">[|>', 2055 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">]|>', 2056 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">(|>', 2057 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">)|>', 2058 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">{|>', 2059 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">}|>', 2060 ); 2061 } 2062 else { 2063 $this->language_data['CACHE_BRACKET_REPLACE'] = array( 2064 '<| class="br0">[|>', 2065 '<| class="br0">]|>', 2066 '<| class="br0">(|>', 2067 '<| class="br0">)|>', 2068 '<| class="br0">{|>', 2069 '<| class="br0">}|>', 2070 ); 2071 } 2072 } 2073 2074 //Build the parse cache needed to highlight numbers appropriate 2075 if($this->lexic_permissions['NUMBERS']) { 2076 //Check if the style rearrangements have been processed ... 2077 //This also does some preprocessing to check which style groups are useable ... 2078 if(!isset($this->language_data['NUMBERS_CACHE'])) { 2079 $this->build_style_cache(); 2080 } 2081 2082 //Number format specification 2083 //All this formats are matched case-insensitively! 2084 static $numbers_format = array( 2085 GESHI_NUMBER_INT_BASIC => 2086 '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2087 GESHI_NUMBER_INT_CSTYLE => 2088 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2089 GESHI_NUMBER_BIN_SUFFIX => 2090 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2091 GESHI_NUMBER_BIN_PREFIX_PERCENT => 2092 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2093 GESHI_NUMBER_BIN_PREFIX_0B => 2094 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2095 GESHI_NUMBER_OCT_PREFIX => 2096 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2097 GESHI_NUMBER_OCT_PREFIX_0O => 2098 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2099 GESHI_NUMBER_OCT_PREFIX_AT => 2100 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2101 GESHI_NUMBER_OCT_SUFFIX => 2102 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2103 GESHI_NUMBER_HEX_PREFIX => 2104 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2105 GESHI_NUMBER_HEX_PREFIX_DOLLAR => 2106 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2107 GESHI_NUMBER_HEX_SUFFIX => 2108 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2109 GESHI_NUMBER_FLT_NONSCI => 2110 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2111 GESHI_NUMBER_FLT_NONSCI_F => 2112 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2113 GESHI_NUMBER_FLT_SCI_SHORT => 2114 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2115 GESHI_NUMBER_FLT_SCI_ZERO => 2116 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)' 2117 ); 2118 2119 //At this step we have an associative array with flag groups for a 2120 //specific style or an string denoting a regexp given its index. 2121 $this->language_data['NUMBERS_RXCACHE'] = array(); 2122 foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) { 2123 if(is_string($rxdata)) { 2124 $regexp = $rxdata; 2125 } else { 2126 //This is a bitfield of number flags to highlight: 2127 //Build an array, implode them together and make this the actual RX 2128 $rxuse = array(); 2129 for($i = 1; $i <= $rxdata; $i<<=1) { 2130 if($rxdata & $i) { 2131 $rxuse[] = $numbers_format[$i]; 2132 } 2133 } 2134 $regexp = implode("|", $rxuse); 2135 } 2136 2137 $this->language_data['NUMBERS_RXCACHE'][$key] = 2138 "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i"; 2139 } 2140 2141 if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) { 2142 $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#'; 2143 } 2144 } 2145 2146 $this->parse_cache_built = true; 2147 } 2148 2149 /** 2150 * Returns the code in $this->source, highlighted and surrounded by the 2151 * nessecary HTML. 2152 * 2153 * This should only be called ONCE, cos it's SLOW! If you want to highlight 2154 * the same source multiple times, you're better off doing a whole lot of 2155 * str_replaces to replace the <span>s 2156 * 2157 * @since 1.0.0 2158 */ 2159 public function parse_code() { 2160 // Start the timer 2161 $start_time = microtime(); 2162 2163 // Replace all newlines to a common form. 2164 $code = str_replace("\r\n", "\n", $this->source); 2165 $code = str_replace("\r", "\n", $code); 2166 2167 // check whether language_data is available 2168 if (empty($this->language_data)) { 2169 $this->error = GESHI_ERROR_NO_SUCH_LANG; 2170 } 2171 2172 // Firstly, if there is an error, we won't highlight 2173 if ($this->error) { 2174 //Escape the source for output 2175 $result = $this->hsc($this->source); 2176 2177 //This fix is related to SF#1923020, but has to be applied regardless of 2178 //actually highlighting symbols. 2179 $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result); 2180 2181 // Timing is irrelevant 2182 $this->set_time($start_time, $start_time); 2183 $this->finalise($result); 2184 return $result; 2185 } 2186 2187 // make sure the parse cache is up2date 2188 if (!$this->parse_cache_built) { 2189 $this->build_parse_cache(); 2190 } 2191 2192 // Initialise various stuff 2193 $length = strlen($code); 2194 $COMMENT_MATCHED = false; 2195 $stuff_to_parse = ''; 2196 $endresult = ''; 2197 2198 // "Important" selections are handled like multiline comments 2199 // @todo GET RID OF THIS SHIZ 2200 if ($this->enable_important_blocks) { 2201 $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT; 2202 } 2203 2204 if ($this->strict_mode) { 2205 // Break the source into bits. Each bit will be a portion of the code 2206 // within script delimiters - for example, HTML between < and > 2207 $k = 0; 2208 $parts = array(); 2209 $matches = array(); 2210 $next_match_pointer = null; 2211 // we use a copy to unset delimiters on demand (when they are not found) 2212 $delim_copy = $this->language_data['SCRIPT_DELIMITERS']; 2213 $i = 0; 2214 while ($i < $length) { 2215 $next_match_pos = $length + 1; // never true 2216 foreach ($delim_copy as $dk => $delimiters) { 2217 if(is_array($delimiters)) { 2218 foreach ($delimiters as $open => $close) { 2219 // make sure the cache is setup properly 2220 if (!isset($matches[$dk][$open])) { 2221 $matches[$dk][$open] = array( 2222 'next_match' => -1, 2223 'dk' => $dk, 2224 2225 'open' => $open, // needed for grouping of adjacent code blocks (see below) 2226 'open_strlen' => strlen($open), 2227 2228 'close' => $close, 2229 'close_strlen' => strlen($close), 2230 ); 2231 } 2232 // Get the next little bit for this opening string 2233 if ($matches[$dk][$open]['next_match'] < $i) { 2234 // only find the next pos if it was not already cached 2235 $open_pos = strpos($code, $open, $i); 2236 if ($open_pos === false) { 2237 // no match for this delimiter ever 2238 unset($delim_copy[$dk][$open]); 2239 continue; 2240 } 2241 $matches[$dk][$open]['next_match'] = $open_pos; 2242 } 2243 if ($matches[$dk][$open]['next_match'] < $next_match_pos) { 2244 //So we got a new match, update the close_pos 2245 $matches[$dk][$open]['close_pos'] = 2246 strpos($code, $close, $matches[$dk][$open]['next_match']+1); 2247 2248 $next_match_pointer =& $matches[$dk][$open]; 2249 $next_match_pos = $matches[$dk][$open]['next_match']; 2250 } 2251 } 2252 } else { 2253 //So we should match an RegExp as Strict Block ... 2254 /** 2255 * The value in $delimiters is expected to be an RegExp 2256 * containing exactly 2 matching groups: 2257 * - Group 1 is the opener 2258 * - Group 2 is the closer 2259 */ 2260 if(preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) { 2261 //We got a match ... 2262 if(isset($matches_rx['start']) && isset($matches_rx['end'])) 2263 { 2264 $matches[$dk] = array( 2265 'next_match' => $matches_rx['start'][1], 2266 'dk' => $dk, 2267 2268 'close_strlen' => strlen($matches_rx['end'][0]), 2269 'close_pos' => $matches_rx['end'][1], 2270 ); 2271 } else { 2272 $matches[$dk] = array( 2273 'next_match' => $matches_rx[1][1], 2274 'dk' => $dk, 2275 2276 'close_strlen' => strlen($matches_rx[2][0]), 2277 'close_pos' => $matches_rx[2][1], 2278 ); 2279 } 2280 } else { 2281 // no match for this delimiter ever 2282 unset($delim_copy[$dk]); 2283 continue; 2284 } 2285 2286 if ($matches[$dk]['next_match'] <= $next_match_pos) { 2287 $next_match_pointer =& $matches[$dk]; 2288 $next_match_pos = $matches[$dk]['next_match']; 2289 } 2290 } 2291 } 2292 2293 // non-highlightable text 2294 $parts[$k] = array( 2295 1 => substr($code, $i, $next_match_pos - $i) 2296 ); 2297 ++$k; 2298 2299 if ($next_match_pos > $length) { 2300 // out of bounds means no next match was found 2301 break; 2302 } 2303 2304 // highlightable code 2305 $parts[$k][0] = $next_match_pointer['dk']; 2306 2307 //Only combine for non-rx script blocks 2308 if(is_array($delim_copy[$next_match_pointer['dk']])) { 2309 // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three! 2310 $i = $next_match_pos + $next_match_pointer['open_strlen']; 2311 while (true) { 2312 $close_pos = strpos($code, $next_match_pointer['close'], $i); 2313 if ($close_pos == false) { 2314 break; 2315 } 2316 $i = $close_pos + $next_match_pointer['close_strlen']; 2317 if ($i == $length) { 2318 break; 2319 } 2320 if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 || 2321 substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) { 2322 // merge adjacent but make sure we don't merge things like <tag><!-- comment --> 2323 foreach ($matches as $submatches) { 2324 foreach ($submatches as $match) { 2325 if ($match['next_match'] == $i) { 2326 // a different block already matches here! 2327 break 3; 2328 } 2329 } 2330 } 2331 } else { 2332 break; 2333 } 2334 } 2335 } else { 2336 $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen']; 2337 $i = $close_pos; 2338 } 2339 2340 if ($close_pos === false) { 2341 // no closing delimiter found! 2342 $parts[$k][1] = substr($code, $next_match_pos); 2343 ++$k; 2344 break; 2345 } else { 2346 $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos); 2347 ++$k; 2348 } 2349 } 2350 unset($delim_copy, $next_match_pointer, $next_match_pos, $matches); 2351 $num_parts = $k; 2352 2353 if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) { 2354 // when we have only one part, we don't have anything to highlight at all. 2355 // if we have a "maybe" strict language, this should be handled as highlightable code 2356 $parts = array( 2357 0 => array( 2358 0 => '', 2359 1 => '' 2360 ), 2361 1 => array( 2362 0 => null, 2363 1 => $parts[0][1] 2364 ) 2365 ); 2366 $num_parts = 2; 2367 } 2368 2369 } else { 2370 // Not strict mode - simply dump the source into 2371 // the array at index 1 (the first highlightable block) 2372 $parts = array( 2373 0 => array( 2374 0 => '', 2375 1 => '' 2376 ), 2377 1 => array( 2378 0 => null, 2379 1 => $code 2380 ) 2381 ); 2382 $num_parts = 2; 2383 } 2384 2385 //Unset variables we won't need any longer 2386 unset($code); 2387 2388 //Preload some repeatedly used values regarding hardquotes ... 2389 $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false; 2390 $hq_strlen = strlen($hq); 2391 2392 //Preload if line numbers are to be generated afterwards 2393 //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398 2394 $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS || 2395 !empty($this->highlight_extra_lines) || !$this->allow_multiline_span; 2396 2397 //preload the escape char for faster checking ... 2398 $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']); 2399 2400 // this is used for single-line comments 2401 $sc_disallowed_before = ""; 2402 $sc_disallowed_after = ""; 2403 2404 if (isset($this->language_data['PARSER_CONTROL'])) { 2405 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) { 2406 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) { 2407 $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE']; 2408 } 2409 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) { 2410 $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER']; 2411 } 2412 } 2413 } 2414 2415 //Fix for SF#1932083: Multichar Quotemarks unsupported 2416 $is_string_starter = array(); 2417 if ($this->lexic_permissions['STRINGS']) { 2418 foreach ($this->language_data['QUOTEMARKS'] as $quotemark) { 2419 if (!isset($is_string_starter[$quotemark[0]])) { 2420 $is_string_starter[$quotemark[0]] = (string)$quotemark; 2421 } elseif (is_string($is_string_starter[$quotemark[0]])) { 2422 $is_string_starter[$quotemark[0]] = array( 2423 $is_string_starter[$quotemark[0]], 2424 $quotemark); 2425 } else { 2426 $is_string_starter[$quotemark[0]][] = $quotemark; 2427 } 2428 } 2429 } 2430 2431 // Now we go through each part. We know that even-indexed parts are 2432 // code that shouldn't be highlighted, and odd-indexed parts should 2433 // be highlighted 2434 for ($key = 0; $key < $num_parts; ++$key) { 2435 $STRICTATTRS = ''; 2436 2437 // If this block should be highlighted... 2438 if (!($key & 1)) { 2439 // Else not a block to highlight 2440 $endresult .= $this->hsc($parts[$key][1]); 2441 unset($parts[$key]); 2442 continue; 2443 } 2444 2445 $result = ''; 2446 $part = $parts[$key][1]; 2447 2448 $highlight_part = true; 2449 if ($this->strict_mode && !is_null($parts[$key][0])) { 2450 // get the class key for this block of code 2451 $script_key = $parts[$key][0]; 2452 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key]; 2453 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' && 2454 $this->lexic_permissions['SCRIPT']) { 2455 // Add a span element around the source to 2456 // highlight the overall source block 2457 if (!$this->use_classes && 2458 $this->language_data['STYLES']['SCRIPT'][$script_key] != '') { 2459 $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"'; 2460 } else { 2461 $attributes = ' class="sc' . $script_key . '"'; 2462 } 2463 $result .= "<span$attributes>"; 2464 $STRICTATTRS = $attributes; 2465 } 2466 } 2467 2468 if ($highlight_part) { 2469 // Now, highlight the code in this block. This code 2470 // is really the engine of GeSHi (along with the method 2471 // parse_non_string_part). 2472 2473 // cache comment regexps incrementally 2474 $next_comment_regexp_key = ''; 2475 $next_comment_regexp_pos = -1; 2476 $next_comment_multi_pos = -1; 2477 $next_comment_single_pos = -1; 2478 $comment_regexp_cache_per_key = array(); 2479 $comment_multi_cache_per_key = array(); 2480 $comment_single_cache_per_key = array(); 2481 $next_open_comment_multi = ''; 2482 $next_comment_single_key = ''; 2483 $escape_regexp_cache_per_key = array(); 2484 $next_escape_regexp_key = ''; 2485 $next_escape_regexp_pos = -1; 2486 2487 $length = strlen($part); 2488 for ($i = 0; $i < $length; ++$i) { 2489 // Get the next char 2490 $char = $part[$i]; 2491 $char_len = 1; 2492 2493 // update regexp comment cache if needed 2494 if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) { 2495 $next_comment_regexp_pos = $length; 2496 foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) { 2497 $match_i = false; 2498 if (isset($comment_regexp_cache_per_key[$comment_key]) && 2499 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i || 2500 $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) { 2501 // we have already matched something 2502 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) { 2503 // this comment is never matched 2504 continue; 2505 } 2506 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos']; 2507 } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i)) { 2508 $match_i = $match[0][1]; 2509 2510 $comment_regexp_cache_per_key[$comment_key] = array( 2511 'key' => $comment_key, 2512 'length' => strlen($match[0][0]), 2513 'pos' => $match_i 2514 ); 2515 } else { 2516 $comment_regexp_cache_per_key[$comment_key]['pos'] = false; 2517 continue; 2518 } 2519 2520 if ($match_i !== false && $match_i < $next_comment_regexp_pos) { 2521 $next_comment_regexp_pos = $match_i; 2522 $next_comment_regexp_key = $comment_key; 2523 if ($match_i === $i) { 2524 break; 2525 } 2526 } 2527 } 2528 } 2529 2530 $string_started = false; 2531 2532 if (isset($is_string_starter[$char])) { 2533 // Possibly the start of a new string ... 2534 2535 //Check which starter it was ... 2536 //Fix for SF#1932083: Multichar Quotemarks unsupported 2537 if (is_array($is_string_starter[$char])) { 2538 $char_new = ''; 2539 foreach ($is_string_starter[$char] as $testchar) { 2540 if ($testchar === substr($part, $i, strlen($testchar)) && 2541 strlen($testchar) > strlen($char_new)) { 2542 $char_new = $testchar; 2543 $string_started = true; 2544 } 2545 } 2546 if ($string_started) { 2547 $char = $char_new; 2548 } 2549 } else { 2550 $testchar = $is_string_starter[$char]; 2551 if ($testchar === substr($part, $i, strlen($testchar))) { 2552 $char = $testchar; 2553 $string_started = true; 2554 } 2555 } 2556 $char_len = strlen($char); 2557 } 2558 2559 if ($string_started && ($i != $next_comment_regexp_pos)) { 2560 // Hand out the correct style information for this string 2561 $string_key = array_search($char, $this->language_data['QUOTEMARKS']); 2562 if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) || 2563 !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) { 2564 $string_key = 0; 2565 } 2566 2567 // parse the stuff before this 2568 $result .= $this->parse_non_string_part($stuff_to_parse); 2569 $stuff_to_parse = ''; 2570 2571 if (!$this->use_classes) { 2572 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"'; 2573 } else { 2574 $string_attributes = ' class="st'.$string_key.'"'; 2575 } 2576 2577 // now handle the string 2578 $string = "<span$string_attributes>" . GeSHi::hsc($char); 2579 $start = $i + $char_len; 2580 $string_open = true; 2581 2582 if(empty($this->language_data['ESCAPE_REGEXP'])) { 2583 $next_escape_regexp_pos = $length; 2584 } 2585 2586 do { 2587 //Get the regular ending pos ... 2588 $close_pos = strpos($part, $char, $start); 2589 if(false === $close_pos) { 2590 $close_pos = $length; 2591 } 2592 2593 if($this->lexic_permissions['ESCAPE_CHAR']) { 2594 // update escape regexp cache if needed 2595 if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) { 2596 $next_escape_regexp_pos = $length; 2597 foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) { 2598 $match_i = false; 2599 if (isset($escape_regexp_cache_per_key[$escape_key]) && 2600 ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start || 2601 $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) { 2602 // we have already matched something 2603 if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) { 2604 // this comment is never matched 2605 continue; 2606 } 2607 $match_i = $escape_regexp_cache_per_key[$escape_key]['pos']; 2608 } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start)) { 2609 $match_i = $match[0][1]; 2610 2611 $escape_regexp_cache_per_key[$escape_key] = array( 2612 'key' => $escape_key, 2613 'length' => strlen($match[0][0]), 2614 'pos' => $match_i 2615 ); 2616 } else { 2617 $escape_regexp_cache_per_key[$escape_key]['pos'] = false; 2618 continue; 2619 } 2620 2621 if ($match_i !== false && $match_i < $next_escape_regexp_pos) { 2622 $next_escape_regexp_pos = $match_i; 2623 $next_escape_regexp_key = $escape_key; 2624 if ($match_i === $start) { 2625 break; 2626 } 2627 } 2628 } 2629 } 2630 2631 //Find the next simple escape position 2632 if('' != $this->language_data['ESCAPE_CHAR']) { 2633 $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start); 2634 if(false === $simple_escape) { 2635 $simple_escape = $length; 2636 } 2637 } else { 2638 $simple_escape = $length; 2639 } 2640 } else { 2641 $next_escape_regexp_pos = $length; 2642 $simple_escape = $length; 2643 } 2644 2645 if($simple_escape < $next_escape_regexp_pos && 2646 $simple_escape < $length && 2647 $simple_escape < $close_pos) { 2648 //The nexxt escape sequence is a simple one ... 2649 $es_pos = $simple_escape; 2650 2651 //Add the stuff not in the string yet ... 2652 $string .= $this->hsc(substr($part, $start, $es_pos - $start)); 2653 2654 //Get the style for this escaped char ... 2655 if (!$this->use_classes) { 2656 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"'; 2657 } else { 2658 $escape_char_attributes = ' class="es0"'; 2659 } 2660 2661 //Add the style for the escape char ... 2662 $string .= "<span$escape_char_attributes>" . 2663 GeSHi::hsc($this->language_data['ESCAPE_CHAR']); 2664 2665 //Get the byte AFTER the ESCAPE_CHAR we just found 2666 $es_char = $part[$es_pos + 1]; 2667 if ($es_char == "\n") { 2668 // don't put a newline around newlines 2669 $string .= "</span>\n"; 2670 $start = $es_pos + 2; 2671 } elseif (ord($es_char) >= 128) { 2672 //This is an non-ASCII char (UTF8 or single byte) 2673 //This code tries to work around SF#2037598 ... 2674 if(function_exists('mb_substr')) { 2675 $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding); 2676 $string .= $es_char_m . '</span>'; 2677 } elseif ('utf-8' == $this->encoding) { 2678 if(preg_match("/[\xC2-\xDF][\x80-\xBF]". 2679 "|\xE0[\xA0-\xBF][\x80-\xBF]". 2680 "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}". 2681 "|\xED[\x80-\x9F][\x80-\xBF]". 2682 "|\xF0[\x90-\xBF][\x80-\xBF]{2}". 2683 "|[\xF1-\xF3][\x80-\xBF]{3}". 2684 "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s", 2685 $part, $es_char_m, null, $es_pos + 1)) { 2686 $es_char_m = $es_char_m[0]; 2687 } else { 2688 $es_char_m = $es_char; 2689 } 2690 $string .= $this->hsc($es_char_m) . '</span>'; 2691 } else { 2692 $es_char_m = $this->hsc($es_char); 2693 } 2694 $start = $es_pos + strlen($es_char_m) + 1; 2695 } else { 2696 $string .= $this->hsc($es_char) . '</span>'; 2697 $start = $es_pos + 2; 2698 } 2699 } elseif ($next_escape_regexp_pos < $length && 2700 $next_escape_regexp_pos < $close_pos) { 2701 $es_pos = $next_escape_regexp_pos; 2702 //Add the stuff not in the string yet ... 2703 $string .= $this->hsc(substr($part, $start, $es_pos - $start)); 2704 2705 //Get the key and length of this match ... 2706 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key]; 2707 $escape_str = substr($part, $es_pos, $escape['length']); 2708 $escape_key = $escape['key']; 2709 2710 //Get the style for this escaped char ... 2711 if (!$this->use_classes) { 2712 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"'; 2713 } else { 2714 $escape_char_attributes = ' class="es' . $escape_key . '"'; 2715 } 2716 2717 //Add the style for the escape char ... 2718 $string .= "<span$escape_char_attributes>" . 2719 $this->hsc($escape_str) . '</span>'; 2720 2721 $start = $es_pos + $escape['length']; 2722 } else { 2723 //Copy the remainder of the string ... 2724 $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>'; 2725 $start = $close_pos + $char_len; 2726 $string_open = false; 2727 } 2728 } while($string_open); 2729 2730 if ($check_linenumbers) { 2731 // Are line numbers used? If, we should end the string before 2732 // the newline and begin it again (so when <li>s are put in the source 2733 // remains XHTML compliant) 2734 // note to self: This opens up possibility of config files specifying 2735 // that languages can/cannot have multiline strings??? 2736 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string); 2737 } 2738 2739 $result .= $string; 2740 $string = ''; 2741 $i = $start - 1; 2742 continue; 2743 } elseif ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char && 2744 substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) { 2745 // The start of a hard quoted string 2746 if (!$this->use_classes) { 2747 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"'; 2748 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"'; 2749 } else { 2750 $string_attributes = ' class="st_h"'; 2751 $escape_char_attributes = ' class="es_h"'; 2752 } 2753 // parse the stuff before this 2754 $result .= $this->parse_non_string_part($stuff_to_parse); 2755 $stuff_to_parse = ''; 2756 2757 // now handle the string 2758 $string = ''; 2759 2760 // look for closing quote 2761 $start = $i + $hq_strlen; 2762 while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) { 2763 $start = $close_pos + 1; 2764 if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] && 2765 (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape 2766 // make sure this quote is not escaped 2767 foreach ($this->language_data['HARDESCAPE'] as $hardescape) { 2768 if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) { 2769 // check wether this quote is escaped or if it is something like '\\' 2770 $escape_char_pos = $close_pos - 1; 2771 while ($escape_char_pos > 0 2772 && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) { 2773 --$escape_char_pos; 2774 } 2775 if (($close_pos - $escape_char_pos) & 1) { 2776 // uneven number of escape chars => this quote is escaped 2777 continue 2; 2778 } 2779 } 2780 } 2781 } 2782 2783 // found closing quote 2784 break; 2785 } 2786 2787 //Found the closing delimiter? 2788 if (!$close_pos) { 2789 // span till the end of this $part when no closing delimiter is found 2790 $close_pos = $length; 2791 } 2792 2793 //Get the actual string 2794 $string = substr($part, $i, $close_pos - $i + 1); 2795 $i = $close_pos; 2796 2797 // handle escape chars and encode html chars 2798 // (special because when we have escape chars within our string they may not be escaped) 2799 if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) { 2800 $start = 0; 2801 $new_string = ''; 2802 while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) { 2803 // hmtl escape stuff before 2804 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start)); 2805 // check if this is a hard escape 2806 foreach ($this->language_data['HARDESCAPE'] as $hardescape) { 2807 if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) { 2808 // indeed, this is a hardescape 2809 $new_string .= "<span$escape_char_attributes>" . 2810 $this->hsc($hardescape) . '</span>'; 2811 $start = $es_pos + strlen($hardescape); 2812 continue 2; 2813 } 2814 } 2815 // not a hard escape, but a normal escape 2816 // they come in pairs of two 2817 $c = 0; 2818 while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1]) 2819 && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR'] 2820 && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) { 2821 $c += 2; 2822 } 2823 if ($c) { 2824 $new_string .= "<span$escape_char_attributes>" . 2825 str_repeat($escaped_escape_char, $c) . 2826 '</span>'; 2827 $start = $es_pos + $c; 2828 } else { 2829 // this is just a single lonely escape char... 2830 $new_string .= $escaped_escape_char; 2831 $start = $es_pos + 1; 2832 } 2833 } 2834 $string = $new_string . $this->hsc(substr($string, $start)); 2835 } else { 2836 $string = $this->hsc($string); 2837 } 2838 2839 if ($check_linenumbers) { 2840 // Are line numbers used? If, we should end the string before 2841 // the newline and begin it again (so when <li>s are put in the source 2842 // remains XHTML compliant) 2843 // note to self: This opens up possibility of config files specifying 2844 // that languages can/cannot have multiline strings??? 2845 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string); 2846 } 2847 2848 $result .= "<span$string_attributes>" . $string . '</span>'; 2849 $string = ''; 2850 continue; 2851 } else { 2852 //Have a look for regexp comments 2853 if ($i == $next_comment_regexp_pos) { 2854 $COMMENT_MATCHED = true; 2855 $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key]; 2856 $test_str = $this->hsc(substr($part, $i, $comment['length'])); 2857 2858 //@todo If remove important do remove here 2859 if ($this->lexic_permissions['COMMENTS']['MULTI']) { 2860 if (!$this->use_classes) { 2861 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"'; 2862 } else { 2863 $attributes = ' class="co' . $comment['key'] . '"'; 2864 } 2865 2866 $test_str = "<span$attributes>" . $test_str . "</span>"; 2867 2868 // Short-cut through all the multiline code 2869 if ($check_linenumbers) { 2870 // strreplace to put close span and open span around multiline newlines 2871 $test_str = str_replace( 2872 "\n", "</span>\n<span$attributes>", 2873 str_replace("\n ", "\n ", $test_str) 2874 ); 2875 } 2876 } 2877 2878 $i += $comment['length'] - 1; 2879 2880 // parse the rest 2881 $result .= $this->parse_non_string_part($stuff_to_parse); 2882 $stuff_to_parse = ''; 2883 } 2884 2885 // If we haven't matched a regexp comment, try multi-line comments 2886 if (!$COMMENT_MATCHED) { 2887 // Is this a multiline comment? 2888 if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) { 2889 $next_comment_multi_pos = $length; 2890 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) { 2891 $match_i = false; 2892 if (isset($comment_multi_cache_per_key[$open]) && 2893 ($comment_multi_cache_per_key[$open] >= $i || 2894 $comment_multi_cache_per_key[$open] === false)) { 2895 // we have already matched something 2896 if ($comment_multi_cache_per_key[$open] === false) { 2897 // this comment is never matched 2898 continue; 2899 } 2900 $match_i = $comment_multi_cache_per_key[$open]; 2901 } elseif (($match_i = stripos($part, $open, $i)) !== false) { 2902 $comment_multi_cache_per_key[$open] = $match_i; 2903 } else { 2904 $comment_multi_cache_per_key[$open] = false; 2905 continue; 2906 } 2907 if ($match_i !== false && $match_i < $next_comment_multi_pos) { 2908 $next_comment_multi_pos = $match_i; 2909 $next_open_comment_multi = $open; 2910 if ($match_i === $i) { 2911 break; 2912 } 2913 } 2914 } 2915 } 2916 if ($i == $next_comment_multi_pos) { 2917 $open = $next_open_comment_multi; 2918 $close = $this->language_data['COMMENT_MULTI'][$open]; 2919 $open_strlen = strlen($open); 2920 $close_strlen = strlen($close); 2921 $COMMENT_MATCHED = true; 2922 $test_str_match = $open; 2923 //@todo If remove important do remove here 2924 if ($this->lexic_permissions['COMMENTS']['MULTI'] || 2925 $open == GESHI_START_IMPORTANT) { 2926 if ($open != GESHI_START_IMPORTANT) { 2927 if (!$this->use_classes) { 2928 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"'; 2929 } else { 2930 $attributes = ' class="coMULTI"'; 2931 } 2932 $test_str = "<span$attributes>" . $this->hsc($open); 2933 } else { 2934 if (!$this->use_classes) { 2935 $attributes = ' style="' . $this->important_styles . '"'; 2936 } else { 2937 $attributes = ' class="imp"'; 2938 } 2939 2940 // We don't include the start of the comment if it's an 2941 // "important" part 2942 $test_str = "<span$attributes>"; 2943 } 2944 } else { 2945 $test_str = $this->hsc($open); 2946 } 2947 2948 $close_pos = strpos( $part, $close, $i + $open_strlen ); 2949 2950 if ($close_pos === false) { 2951 $close_pos = $length; 2952 } 2953 2954 // Short-cut through all the multiline code 2955 $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen)); 2956 if (($this->lexic_permissions['COMMENTS']['MULTI'] || 2957 $test_str_match == GESHI_START_IMPORTANT) && 2958 $check_linenumbers) { 2959 2960 // strreplace to put close span and open span around multiline newlines 2961 $test_str .= str_replace( 2962 "\n", "</span>\n<span$attributes>", 2963 str_replace("\n ", "\n ", $rest_of_comment) 2964 ); 2965 } else { 2966 $test_str .= $rest_of_comment; 2967 } 2968 2969 if ($this->lexic_permissions['COMMENTS']['MULTI'] || 2970 $test_str_match == GESHI_START_IMPORTANT) { 2971 $test_str .= '</span>'; 2972 } 2973 2974 $i = $close_pos + $close_strlen - 1; 2975 2976 // parse the rest 2977 $result .= $this->parse_non_string_part($stuff_to_parse); 2978 $stuff_to_parse = ''; 2979 } 2980 } 2981 2982 // If we haven't matched a multiline comment, try single-line comments 2983 if (!$COMMENT_MATCHED) { 2984 // cache potential single line comment occurances 2985 if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) { 2986 $next_comment_single_pos = $length; 2987 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) { 2988 $match_i = false; 2989 if (isset($comment_single_cache_per_key[$comment_key]) && 2990 ($comment_single_cache_per_key[$comment_key] >= $i || 2991 $comment_single_cache_per_key[$comment_key] === false)) { 2992 // we have already matched something 2993 if ($comment_single_cache_per_key[$comment_key] === false) { 2994 // this comment is never matched 2995 continue; 2996 } 2997 $match_i = $comment_single_cache_per_key[$comment_key]; 2998 } elseif ( 2999 // case sensitive comments 3000 ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] && 3001 ($match_i = stripos($part, $comment_mark, $i)) !== false) || 3002 // non case sensitive 3003 (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] && 3004 (($match_i = strpos($part, $comment_mark, $i)) !== false))) { 3005 $comment_single_cache_per_key[$comment_key] = $match_i; 3006 } else { 3007 $comment_single_cache_per_key[$comment_key] = false; 3008 continue; 3009 } 3010 if ($match_i !== false && $match_i < $next_comment_single_pos) { 3011 $next_comment_single_pos = $match_i; 3012 $next_comment_single_key = $comment_key; 3013 if ($match_i === $i) { 3014 break; 3015 } 3016 } 3017 } 3018 } 3019 if ($next_comment_single_pos == $i) { 3020 $comment_key = $next_comment_single_key; 3021 $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key]; 3022 $com_len = strlen($comment_mark); 3023 3024 // This check will find special variables like $# in bash 3025 // or compiler directives of Delphi beginning {$ 3026 if ((empty($sc_disallowed_before) || ($i == 0) || 3027 (false === strpos($sc_disallowed_before, $part[$i-1]))) && 3028 (empty($sc_disallowed_after) || ($length <= $i + $com_len) || 3029 (false === strpos($sc_disallowed_after, $part[$i + $com_len])))) 3030 { 3031 // this is a valid comment 3032 $COMMENT_MATCHED = true; 3033 if ($this->lexic_permissions['COMMENTS'][$comment_key]) { 3034 if (!$this->use_classes) { 3035 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"'; 3036 } else { 3037 $attributes = ' class="co' . $comment_key . '"'; 3038 } 3039 $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark)); 3040 } else { 3041 $test_str = $this->hsc($comment_mark); 3042 } 3043 3044 //Check if this comment is the last in the source 3045 $close_pos = strpos($part, "\n", $i); 3046 $oops = false; 3047 if ($close_pos === false) { 3048 $close_pos = $length; 3049 $oops = true; 3050 } 3051 $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len)); 3052 if ($this->lexic_permissions['COMMENTS'][$comment_key]) { 3053 $test_str .= "</span>"; 3054 } 3055 3056 // Take into account that the comment might be the last in the source 3057 if (!$oops) { 3058 $test_str .= "\n"; 3059 } 3060 3061 $i = $close_pos; 3062 3063 // parse the rest 3064 $result .= $this->parse_non_string_part($stuff_to_parse); 3065 $stuff_to_parse = ''; 3066 } 3067 } 3068 } 3069 } 3070 3071 // Where are we adding this char? 3072 if (!$COMMENT_MATCHED) { 3073 $stuff_to_parse .= $char; 3074 } else { 3075 $result .= $test_str; 3076 unset($test_str); 3077 $COMMENT_MATCHED = false; 3078 } 3079 } 3080 // Parse the last bit 3081 $result .= $this->parse_non_string_part($stuff_to_parse); 3082 $stuff_to_parse = ''; 3083 } else { 3084 $result .= $this->hsc($part); 3085 } 3086 // Close the <span> that surrounds the block 3087 if ($STRICTATTRS != '') { 3088 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result); 3089 $result .= '</span>'; 3090 } 3091 3092 $endresult .= $result; 3093 unset($part, $parts[$key], $result); 3094 } 3095 3096 //This fix is related to SF#1923020, but has to be applied regardless of 3097 //actually highlighting symbols. 3098 /** NOTE: memorypeak #3 */ 3099 $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult); 3100 3101// // Parse the last stuff (redundant?) 3102// $result .= $this->parse_non_string_part($stuff_to_parse); 3103 3104 // Lop off the very first and last spaces 3105// $result = substr($result, 1, -1); 3106 3107 // We're finished: stop timing 3108 $this->set_time($start_time, microtime()); 3109 3110 $this->finalise($endresult); 3111 return $endresult; 3112 } 3113 3114 /** 3115 * Swaps out spaces and tabs for HTML indentation. Not needed if 3116 * the code is in a pre block... 3117 * 3118 * @param string $result The source to indent (reference!) 3119 * @since 1.0.0 3120 */ 3121 protected function indent(&$result) { 3122 /// Replace tabs with the correct number of spaces 3123 if (false !== strpos($result, "\t")) { 3124 $lines = explode("\n", $result); 3125 $result = null;//Save memory while we process the lines individually 3126 $tab_width = $this->get_real_tab_width(); 3127 $tab_string = ' ' . str_repeat(' ', $tab_width); 3128 3129 for ($key = 0, $n = count($lines); $key < $n; $key++) { 3130 $line = $lines[$key]; 3131 if (false === strpos($line, "\t")) { 3132 continue; 3133 } 3134 3135 $pos = 0; 3136 $length = strlen($line); 3137 $lines[$key] = ''; // reduce memory 3138 3139 $IN_TAG = false; 3140 for ($i = 0; $i < $length; ++$i) { 3141 $char = $line[$i]; 3142 // Simple engine to work out whether we're in a tag. 3143 // If we are we modify $pos. This is so we ignore HTML 3144 // in the line and only workout the tab replacement 3145 // via the actual content of the string 3146 // This test could be improved to include strings in the 3147 // html so that < or > would be allowed in user's styles 3148 // (e.g. quotes: '<' '>'; or similar) 3149 if ($IN_TAG) { 3150 if ('>' == $char) { 3151 $IN_TAG = false; 3152 } 3153 $lines[$key] .= $char; 3154 } elseif ('<' == $char) { 3155 $IN_TAG = true; 3156 $lines[$key] .= '<'; 3157 } elseif ('&' == $char) { 3158 $substr = substr($line, $i + 3, 5); 3159 $posi = strpos($substr, ';'); 3160 if (false === $posi) { 3161 ++$pos; 3162 } else { 3163 $pos -= $posi+2; 3164 } 3165 $lines[$key] .= $char; 3166 } elseif ("\t" == $char) { 3167 $str = ''; 3168 // OPTIMISE - move $strs out. Make an array: 3169 // $tabs = array( 3170 // 1 => ' ', 3171 // 2 => ' ', 3172 // 3 => ' ' etc etc 3173 // to use instead of building a string every time 3174 $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop 3175 if (($pos & 1) || 1 == $tab_end_width) { 3176 $str .= substr($tab_string, 6, $tab_end_width); 3177 } else { 3178 $str .= substr($tab_string, 0, $tab_end_width+5); 3179 } 3180 $lines[$key] .= $str; 3181 $pos += $tab_end_width; 3182 3183 if (false === strpos($line, "\t", $i + 1)) { 3184 $lines[$key] .= substr($line, $i + 1); 3185 break; 3186 } 3187 } elseif (0 == $pos && ' ' == $char) { 3188 $lines[$key] .= ' '; 3189 ++$pos; 3190 } else { 3191 $lines[$key] .= $char; 3192 ++$pos; 3193 } 3194 } 3195 } 3196 $result = implode("\n", $lines); 3197 unset($lines);//We don't need the lines separated beyond this --- free them! 3198 } 3199 // Other whitespace 3200 // BenBE: Fix to reduce the number of replacements to be done 3201 $result = preg_replace('/^ /m', ' ', $result); 3202 $result = str_replace(' ', ' ', $result); 3203 3204 if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) { 3205 if ($this->line_ending === null) { 3206 $result = nl2br($result); 3207 } else { 3208 $result = str_replace("\n", $this->line_ending, $result); 3209 } 3210 } 3211 } 3212 3213 /** 3214 * Changes the case of a keyword for those languages where a change is asked for 3215 * 3216 * @param string $instr The keyword to change the case of 3217 * @return string The keyword with its case changed 3218 * @since 1.0.0 3219 */ 3220 protected function change_case($instr) { 3221 switch ($this->language_data['CASE_KEYWORDS']) { 3222 case GESHI_CAPS_UPPER: 3223 return strtoupper($instr); 3224 case GESHI_CAPS_LOWER: 3225 return strtolower($instr); 3226 default: 3227 return $instr; 3228 } 3229 } 3230 3231 /** 3232 * Handles replacements of keywords to include markup and links if requested 3233 * 3234 * @param string $match The keyword to add the Markup to 3235 * @return string The HTML for the match found 3236 * @since 1.0.8 3237 * 3238 * @todo Get rid of ender in keyword links 3239 */ 3240 protected function handle_keyword_replace($match) { 3241 $k = $this->_kw_replace_group; 3242 $keyword = $match[0]; 3243 $keyword_match = $match[1]; 3244 3245 $before = ''; 3246 $after = ''; 3247 3248 if ($this->keyword_links) { 3249 // Keyword links have been ebabled 3250 3251 if (isset($this->language_data['URLS'][$k]) && 3252 $this->language_data['URLS'][$k] != '') { 3253 // There is a base group for this keyword 3254 3255 // Old system: strtolower 3256 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword); 3257 // New system: get keyword from language file to get correct case 3258 if (!$this->language_data['CASE_SENSITIVE'][$k] && 3259 strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) { 3260 foreach ($this->language_data['KEYWORDS'][$k] as $word) { 3261 if (strcasecmp($word, $keyword_match) == 0) { 3262 break; 3263 } 3264 } 3265 } else { 3266 $word = $keyword_match; 3267 } 3268 3269 $before = '<|UR1|"' . 3270 str_replace( 3271 array( 3272 '{FNAME}', 3273 '{FNAMEL}', 3274 '{FNAMEU}', 3275 '{FNAMEUF}', 3276 '.'), 3277 array( 3278 str_replace('+', '%20', urlencode($this->hsc($word))), 3279 str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))), 3280 str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))), 3281 str_replace('+', '%20', urlencode($this->hsc(ucfirst($word)))), 3282 '<DOT>'), 3283 $this->language_data['URLS'][$k] 3284 ) . '">'; 3285 $after = '</a>'; 3286 } 3287 } 3288 3289 return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after; 3290 } 3291 3292 /** 3293 * handles regular expressions highlighting-definitions with callback functions 3294 * 3295 * @note this is a callback, don't use it directly 3296 * 3297 * @param array $matches the matches array 3298 * @return string The highlighted string 3299 * @since 1.0.8 3300 */ 3301 protected function handle_regexps_callback($matches) { 3302 // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'", 3303 return ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>'; 3304 } 3305 3306 /** 3307 * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this 3308 * 3309 * @note this is a callback, don't use it directly 3310 * 3311 * @param array $matches the matches array 3312 * @return string 3313 * @since 1.0.8 3314 */ 3315 protected function handle_multiline_regexps($matches) { 3316 $before = $this->_hmr_before; 3317 $after = $this->_hmr_after; 3318 if ($this->_hmr_replace) { 3319 $replace = $this->_hmr_replace; 3320 $search = array(); 3321 3322 foreach (array_keys($matches) as $k) { 3323 $search[] = '\\' . $k; 3324 } 3325 3326 $before = str_replace($search, $matches, $before); 3327 $after = str_replace($search, $matches, $after); 3328 $replace = str_replace($search, $matches, $replace); 3329 } else { 3330 $replace = $matches[0]; 3331 } 3332 return $before 3333 . '<|!REG3XP' . $this->_hmr_key .'!>' 3334 . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace) 3335 . '|>' 3336 . $after; 3337 } 3338 3339 /** 3340 * Takes a string that has no strings or comments in it, and highlights 3341 * stuff like keywords, numbers and methods. 3342 * 3343 * @param string $stuff_to_parse The string to parse for keyword, numbers etc. 3344 * @since 1.0.0 3345 * @todo BUGGY! Why? Why not build string and return? 3346 * @return string 3347 */ 3348 protected function parse_non_string_part($stuff_to_parse) { 3349 $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse); 3350 3351 // Highlight keywords 3352 $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#|^&"; 3353 $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;"; 3354 if ($this->lexic_permissions['STRINGS']) { 3355 $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/'); 3356 $disallowed_before .= $quotemarks; 3357 $disallowed_after .= $quotemarks; 3358 } 3359 $disallowed_before .= "])"; 3360 $disallowed_after .= "])"; 3361 3362 $parser_control_pergroup = false; 3363 if (isset($this->language_data['PARSER_CONTROL'])) { 3364 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) { 3365 $x = 0; // check wether per-keyword-group parser_control is enabled 3366 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) { 3367 $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE']; 3368 ++$x; 3369 } 3370 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) { 3371 $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER']; 3372 ++$x; 3373 } 3374 $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0; 3375 } 3376 } 3377 3378 foreach (array_keys($this->language_data['KEYWORDS']) as $k) { 3379 if (!isset($this->lexic_permissions['KEYWORDS'][$k]) || 3380 $this->lexic_permissions['KEYWORDS'][$k]) { 3381 3382 $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k]; 3383 $modifiers = $case_sensitive ? '' : 'i'; 3384 3385 // NEW in 1.0.8 - per-keyword-group parser control 3386 $disallowed_before_local = $disallowed_before; 3387 $disallowed_after_local = $disallowed_after; 3388 if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) { 3389 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) { 3390 $disallowed_before_local = 3391 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE']; 3392 } 3393 3394 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) { 3395 $disallowed_after_local = 3396 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER']; 3397 } 3398 } 3399 3400 $this->_kw_replace_group = $k; 3401 3402 //NEW in 1.0.8, the cached regexp list 3403 // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks 3404 for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set < $set_length; ++$set) { 3405 $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set]; 3406 // Might make a more unique string for putting the number in soon 3407 // Basically, we don't put the styles in yet because then the styles themselves will 3408 // get highlighted if the language has a CSS keyword in it (like CSS, for example ;)) 3409 $stuff_to_parse = preg_replace_callback( 3410 "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php|aspx?))$disallowed_after_local/$modifiers", 3411 array($this, 'handle_keyword_replace'), 3412 $stuff_to_parse 3413 ); 3414 } 3415 } 3416 } 3417 3418 // Regular expressions 3419 foreach ($this->language_data['REGEXPS'] as $key => $regexp) { 3420 if ($this->lexic_permissions['REGEXPS'][$key]) { 3421 if (is_array($regexp)) { 3422 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 3423 // produce valid HTML when we match multiple lines 3424 $this->_hmr_replace = $regexp[GESHI_REPLACE]; 3425 $this->_hmr_before = $regexp[GESHI_BEFORE]; 3426 $this->_hmr_key = $key; 3427 $this->_hmr_after = $regexp[GESHI_AFTER]; 3428 $stuff_to_parse = preg_replace_callback( 3429 "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}", 3430 array($this, 'handle_multiline_regexps'), 3431 $stuff_to_parse); 3432 $this->_hmr_replace = false; 3433 $this->_hmr_before = ''; 3434 $this->_hmr_after = ''; 3435 } else { 3436 $stuff_to_parse = preg_replace( 3437 '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS], 3438 $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER], 3439 $stuff_to_parse); 3440 } 3441 } else { 3442 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 3443 // produce valid HTML when we match multiple lines 3444 $this->_hmr_key = $key; 3445 $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/", 3446 array($this, 'handle_multiline_regexps'), $stuff_to_parse); 3447 $this->_hmr_key = ''; 3448 } else { 3449 $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse); 3450 } 3451 } 3452 } 3453 } 3454 3455 // Highlight numbers. As of 1.0.8 we support different types of numbers 3456 $numbers_found = false; 3457 3458 if ($this->lexic_permissions['NUMBERS'] && preg_match($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'], $stuff_to_parse )) { 3459 $numbers_found = true; 3460 3461 //For each of the formats ... 3462 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) { 3463 //Check if it should be highlighted ... 3464 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse); 3465 } 3466 } 3467 3468 // 3469 // Now that's all done, replace /[number]/ with the correct styles 3470 // 3471 foreach (array_keys($this->language_data['KEYWORDS']) as $k) { 3472 if (!$this->use_classes) { 3473 $attributes = ' style="' . 3474 (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ? 3475 $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"'; 3476 } else { 3477 $attributes = ' class="kw' . $k . '"'; 3478 } 3479 $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse); 3480 } 3481 3482 if ($numbers_found) { 3483 // Put number styles in 3484 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) { 3485 //Commented out for now, as this needs some review ... 3486 // if ($numbers_permissions & $id) { 3487 //Get the appropriate style ... 3488 //Checking for unset styles is done by the style cache builder ... 3489 if (!$this->use_classes) { 3490 $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"'; 3491 } else { 3492 $attributes = ' class="nu'.$id.'"'; 3493 } 3494 3495 //Set in the correct styles ... 3496 $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse); 3497 // } 3498 } 3499 } 3500 3501 // Highlight methods and fields in objects 3502 if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) { 3503 $oolang_spaces = "[\s]*"; 3504 $oolang_before = ""; 3505 $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*"; 3506 if (isset($this->language_data['PARSER_CONTROL'])) { 3507 if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) { 3508 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) { 3509 $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE']; 3510 } 3511 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) { 3512 $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER']; 3513 } 3514 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) { 3515 $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES']; 3516 } 3517 } 3518 } 3519 3520 foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) { 3521 if (false !== strpos($stuff_to_parse, $splitter)) { 3522 if (!$this->use_classes) { 3523 $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"'; 3524 } else { 3525 $attributes = ' class="me' . $key . '"'; 3526 } 3527 $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse); 3528 } 3529 } 3530 } 3531 3532 // 3533 // Highlight brackets. Yes, I've tried adding a semi-colon to this list. 3534 // You try it, and see what happens ;) 3535 // TODO: Fix lexic permissions not converting entities if shouldn't 3536 // be highlighting regardless 3537 // 3538 if ($this->lexic_permissions['BRACKETS']) { 3539 $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'], 3540 $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse ); 3541 } 3542 3543 3544 //FIX for symbol highlighting ... 3545 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) { 3546 //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp) 3547 $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER); 3548 $global_offset = 0; 3549 for ($s_id = 0; $s_id < $n_symbols; ++$s_id) { 3550 $symbol_match = $pot_symbols[$s_id][0][0]; 3551 if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) { 3552 // already highlighted blocks _must_ include either < or > 3553 // so if this conditional applies, we have to skip this match 3554 // BenBE: UNLESS the block contains <SEMI> or <PIPE> 3555 if(strpos($symbol_match, '<SEMI>') === false && 3556 strpos($symbol_match, '<PIPE>') === false) { 3557 continue; 3558 } 3559 } 3560 3561 // if we reach this point, we have a valid match which needs to be highlighted 3562 3563 $symbol_length = strlen($symbol_match); 3564 $symbol_offset = $pot_symbols[$s_id][0][1]; 3565 unset($pot_symbols[$s_id]); 3566 $symbol_hl = ""; 3567 3568 // if we have multiple styles, we have to handle them properly 3569 if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) { 3570 $old_sym = -1; 3571 // Split the current stuff to replace into its atomic symbols ... 3572 preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER); 3573 foreach ($sym_match_syms[0] as $sym_ms) { 3574 //Check if consequtive symbols belong to the same group to save output ... 3575 if (isset($this->language_data['SYMBOL_DATA'][$sym_ms]) 3576 && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) { 3577 if (-1 != $old_sym) { 3578 $symbol_hl .= "|>"; 3579 } 3580 $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms]; 3581 if (!$this->use_classes) { 3582 $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">'; 3583 } else { 3584 $symbol_hl .= '<| class="sy' . $old_sym . '">'; 3585 } 3586 } 3587 $symbol_hl .= $sym_ms; 3588 } 3589 unset($sym_match_syms); 3590 3591 //Close remaining tags and insert the replacement at the right position ... 3592 //Take caution if symbol_hl is empty to avoid doubled closing spans. 3593 if (-1 != $old_sym) { 3594 $symbol_hl .= "|>"; 3595 } 3596 } else { 3597 if (!$this->use_classes) { 3598 $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">'; 3599 } else { 3600 $symbol_hl = '<| class="sy0">'; 3601 } 3602 $symbol_hl .= $symbol_match . '|>'; 3603 } 3604 3605 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length); 3606 3607 // since we replace old text with something of different size, 3608 // we'll have to keep track of the differences 3609 $global_offset += strlen($symbol_hl) - $symbol_length; 3610 } 3611 } 3612 //FIX for symbol highlighting ... 3613 3614 // Add class/style for regexps 3615 foreach (array_keys($this->language_data['REGEXPS']) as $key) { 3616 if ($this->lexic_permissions['REGEXPS'][$key]) { 3617 if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) { 3618 $this->_rx_key = $key; 3619 $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U", 3620 array($this, 'handle_regexps_callback'), 3621 $stuff_to_parse); 3622 } else { 3623 if (!$this->use_classes) { 3624 $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"'; 3625 } else { 3626 if (is_array($this->language_data['REGEXPS'][$key]) && 3627 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) { 3628 $attributes = ' class="' . 3629 $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"'; 3630 } else { 3631 $attributes = ' class="re' . $key . '"'; 3632 } 3633 } 3634 $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse); 3635 } 3636 } 3637 } 3638 3639 // Replace <DOT> with . for urls 3640 $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse); 3641 // Replace <|UR1| with <a href= for urls also 3642 if (isset($this->link_styles[GESHI_LINK])) { 3643 if ($this->use_classes) { 3644 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse); 3645 } else { 3646 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse); 3647 } 3648 } else { 3649 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse); 3650 } 3651 3652 // 3653 // NOW we add the span thingy ;) 3654 // 3655 3656 $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse); 3657 $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse ); 3658 return substr($stuff_to_parse, 1); 3659 } 3660 3661 /** 3662 * Sets the time taken to parse the code 3663 * 3664 * @param string $start_time The time when parsing started as returned by @see microtime() 3665 * @param string $end_time The time when parsing ended as returned by @see microtime() 3666 * @since 1.0.2 3667 */ 3668 protected function set_time($start_time, $end_time) { 3669 $start = explode(' ', $start_time); 3670 $end = explode(' ', $end_time); 3671 $this->time = $end[0] + $end[1] - $start[0] - $start[1]; 3672 } 3673 3674 /** 3675 * Gets the time taken to parse the code 3676 * 3677 * @return double The time taken to parse the code 3678 * @since 1.0.2 3679 */ 3680 public function get_time() { 3681 return $this->time; 3682 } 3683 3684 /** 3685 * Merges arrays recursively, overwriting values of the first array with values of later arrays 3686 * 3687 * @since 1.0.8 3688 */ 3689 protected function merge_arrays() { 3690 $arrays = func_get_args(); 3691 $narrays = count($arrays); 3692 3693 // check arguments 3694 // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array) 3695 for ($i = 0; $i < $narrays; $i ++) { 3696 if (!is_array($arrays[$i])) { 3697 // also array_merge_recursive returns nothing in this case 3698 trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING); 3699 return false; 3700 } 3701 } 3702 3703 // the first array is in the output set in every case 3704 $ret = $arrays[0]; 3705 3706 // merege $ret with the remaining arrays 3707 for ($i = 1; $i < $narrays; $i ++) { 3708 foreach ($arrays[$i] as $key => $value) { 3709 if (is_array($value) && isset($ret[$key])) { 3710 // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays) 3711 // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false. 3712 $ret[$key] = $this->merge_arrays($ret[$key], $value); 3713 } else { 3714 $ret[$key] = $value; 3715 } 3716 } 3717 } 3718 3719 return $ret; 3720 } 3721 3722 /** 3723 * Gets language information and stores it for later use 3724 * 3725 * @param string $file_name The filename of the language file you want to load 3726 * @since 1.0.0 3727 * @todo Needs to load keys for lexic permissions for keywords, regexps etc 3728 */ 3729 protected function load_language($file_name) { 3730 if ($file_name == $this->loaded_language) { 3731 // this file is already loaded! 3732 return; 3733 } 3734 3735 //Prepare some stuff before actually loading the language file 3736 $this->loaded_language = $file_name; 3737 $this->parse_cache_built = false; 3738 $this->enable_highlighting(); 3739 $language_data = array(); 3740 3741 //Load the language file 3742 require $file_name; 3743 3744 // Perhaps some checking might be added here later to check that 3745 // $language data is a valid thing but maybe not 3746 $this->language_data = $language_data; 3747 3748 // Set strict mode if should be set 3749 $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES']; 3750 3751 // Set permissions for all lexics to true 3752 // so they'll be highlighted by default 3753 foreach (array_keys($this->language_data['KEYWORDS']) as $key) { 3754 if (!empty($this->language_data['KEYWORDS'][$key])) { 3755 $this->lexic_permissions['KEYWORDS'][$key] = true; 3756 } else { 3757 $this->lexic_permissions['KEYWORDS'][$key] = false; 3758 } 3759 } 3760 3761 foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) { 3762 $this->lexic_permissions['COMMENTS'][$key] = true; 3763 } 3764 foreach (array_keys($this->language_data['REGEXPS']) as $key) { 3765 $this->lexic_permissions['REGEXPS'][$key] = true; 3766 } 3767 3768 // for BenBE and future code reviews: 3769 // we can use empty here since we only check for existance and emptiness of an array 3770 // if it is not an array at all but rather false or null this will work as intended as well 3771 // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice 3772 if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) { 3773 foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) { 3774 // it's either true or false and maybe is true as well 3775 $perm = $value !== GESHI_NEVER; 3776 if ($flag == 'ALL') { 3777 $this->enable_highlighting($perm); 3778 continue; 3779 } 3780 if (!isset($this->lexic_permissions[$flag])) { 3781 // unknown lexic permission 3782 continue; 3783 } 3784 if (is_array($this->lexic_permissions[$flag])) { 3785 foreach ($this->lexic_permissions[$flag] as $key => $val) { 3786 $this->lexic_permissions[$flag][$key] = $perm; 3787 } 3788 } else { 3789 $this->lexic_permissions[$flag] = $perm; 3790 } 3791 } 3792 unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']); 3793 } 3794 3795 //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given 3796 //You need to set one for HARDESCAPES only in this case. 3797 if(!isset($this->language_data['HARDCHAR'])) { 3798 $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR']; 3799 } 3800 3801 //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults 3802 $style_filename = substr($file_name, 0, -4) . '.style.php'; 3803 if (is_readable($style_filename)) { 3804 //Clear any style_data that could have been set before ... 3805 if (isset($style_data)) { 3806 unset($style_data); 3807 } 3808 3809 //Read the Style Information from the style file 3810 include $style_filename; 3811 3812 //Apply the new styles to our current language styles 3813 if (isset($style_data) && is_array($style_data)) { 3814 $this->language_data['STYLES'] = 3815 $this->merge_arrays($this->language_data['STYLES'], $style_data); 3816 } 3817 } 3818 } 3819 3820 /** 3821 * Takes the parsed code and various options, and creates the HTML 3822 * surrounding it to make it look nice. 3823 * 3824 * @param string $parsed_code The code already parsed (reference!) 3825 * @since 1.0.0 3826 */ 3827 protected function finalise(&$parsed_code) { 3828 // Remove end parts of important declarations 3829 // This is BUGGY!! My fault for bad code: fix coming in 1.2 3830 // @todo Remove this crap 3831 if ($this->enable_important_blocks && 3832 (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) { 3833 $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code); 3834 } 3835 3836 // Add HTML whitespace stuff if we're using the <div> header 3837 if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) { 3838 $this->indent($parsed_code); 3839 } 3840 3841 // purge some unnecessary stuff 3842 /** NOTE: memorypeak #1 */ 3843 $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code); 3844 3845 // If we are using IDs for line numbers, there needs to be an overall 3846 // ID set to prevent collisions. 3847 if ($this->add_ids && !$this->overall_id) { 3848 $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4); 3849 } 3850 3851 // Get code into lines 3852 /** NOTE: memorypeak #2 */ 3853 $code = explode("\n", $parsed_code); 3854 $parsed_code = $this->header(); 3855 3856 // If we're using line numbers, we insert <li>s and appropriate 3857 // markup to style them (otherwise we don't need to do anything) 3858 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) { 3859 // If we're using the <pre> header, we shouldn't add newlines because 3860 // the <pre> will line-break them (and the <li>s already do this for us) 3861 $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : ''; 3862 3863 // Foreach line... 3864 for ($i = 0, $n = count($code); $i < $n;) { 3865 //Reset the attributes for a new line ... 3866 $attrs = array(); 3867 3868 // Make lines have at least one space in them if they're empty 3869 // BenBE: Checking emptiness using trim instead of relying on blanks 3870 if ('' == trim($code[$i])) { 3871 $code[$i] = ' '; 3872 } 3873 3874 // If this is a "special line"... 3875 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && 3876 $i % $this->line_nth_row == ($this->line_nth_row - 1)) { 3877 // Set the attributes to style the line 3878 if ($this->use_classes) { 3879 //$attr = ' class="li2"'; 3880 $attrs['class'][] = 'li2'; 3881 $def_attr = ' class="de2"'; 3882 } else { 3883 //$attr = ' style="' . $this->line_style2 . '"'; 3884 $attrs['style'][] = $this->line_style2; 3885 // This style "covers up" the special styles set for special lines 3886 // so that styles applied to special lines don't apply to the actual 3887 // code on that line 3888 $def_attr = ' style="' . $this->code_style . '"'; 3889 } 3890 } else { 3891 if ($this->use_classes) { 3892 //$attr = ' class="li1"'; 3893 $attrs['class'][] = 'li1'; 3894 $def_attr = ' class="de1"'; 3895 } else { 3896 //$attr = ' style="' . $this->line_style1 . '"'; 3897 $attrs['style'][] = $this->line_style1; 3898 $def_attr = ' style="' . $this->code_style . '"'; 3899 } 3900 } 3901 3902 //Check which type of tag to insert for this line 3903 if ($this->header_type == GESHI_HEADER_PRE_VALID) { 3904 $start = "<pre$def_attr>"; 3905 $end = '</pre>'; 3906 } else { 3907 // Span or div? 3908 $start = "<div$def_attr>"; 3909 $end = '</div>'; 3910 } 3911 3912 ++$i; 3913 3914 // Are we supposed to use ids? If so, add them 3915 if ($this->add_ids) { 3916 $attrs['id'][] = "$this->overall_id-$i"; 3917 } 3918 3919 //Is this some line with extra styles??? 3920 if (in_array($i, $this->highlight_extra_lines)) { 3921 if ($this->use_classes) { 3922 if (isset($this->highlight_extra_lines_styles[$i])) { 3923 $attrs['class'][] = "lx$i"; 3924 } else { 3925 $attrs['class'][] = "ln-xtra"; 3926 } 3927 } else { 3928 array_push($attrs['style'], $this->get_line_style($i)); 3929 } 3930 } 3931 3932 // Add in the line surrounded by appropriate list HTML 3933 $attr_string = ''; 3934 foreach ($attrs as $key => $attr) { 3935 $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"'; 3936 } 3937 3938 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls"; 3939 unset($code[$i - 1]); 3940 } 3941 } else { 3942 $n = count($code); 3943 if ($this->use_classes) { 3944 $attributes = ' class="de1"'; 3945 } else { 3946 $attributes = ' style="'. $this->code_style .'"'; 3947 } 3948 if ($this->header_type == GESHI_HEADER_PRE_VALID) { 3949 $parsed_code .= '<pre'. $attributes .'>'; 3950 } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) { 3951 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 3952 if ($this->use_classes) { 3953 $attrs = ' class="ln"'; 3954 } else { 3955 $attrs = ' style="'. $this->table_linenumber_style .'"'; 3956 } 3957 $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>'; 3958 // get linenumbers 3959 // we don't merge it with the for below, since it should be better for 3960 // memory consumption this way 3961 // @todo: but... actually it would still be somewhat nice to merge the two loops 3962 // the mem peaks are at different positions 3963 for ($i = 0; $i < $n; ++$i) { 3964 $close = 0; 3965 // fancy lines 3966 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && 3967 $i % $this->line_nth_row == ($this->line_nth_row - 1)) { 3968 // Set the attributes to style the line 3969 if ($this->use_classes) { 3970 $parsed_code .= '<span class="xtra li2"><span class="de2">'; 3971 } else { 3972 // This style "covers up" the special styles set for special lines 3973 // so that styles applied to special lines don't apply to the actual 3974 // code on that line 3975 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">' 3976 .'<span style="' . $this->code_style .'">'; 3977 } 3978 $close += 2; 3979 } 3980 //Is this some line with extra styles??? 3981 if (in_array($i + 1, $this->highlight_extra_lines)) { 3982 if ($this->use_classes) { 3983 if (isset($this->highlight_extra_lines_styles[$i])) { 3984 $parsed_code .= "<span class=\"xtra lx$i\">"; 3985 } else { 3986 $parsed_code .= "<span class=\"xtra ln-xtra\">"; 3987 } 3988 } else { 3989 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">"; 3990 } 3991 ++$close; 3992 } 3993 $parsed_code .= $this->line_numbers_start + $i; 3994 if ($close) { 3995 $parsed_code .= str_repeat('</span>', $close); 3996 } elseif ($i != $n) { 3997 $parsed_code .= "\n"; 3998 } 3999 } 4000 $parsed_code .= '</pre></td><td'.$attributes.'>'; 4001 } 4002 $parsed_code .= '<pre'. $attributes .'>'; 4003 } 4004 // No line numbers, but still need to handle highlighting lines extra. 4005 // Have to use divs so the full width of the code is highlighted 4006 $close = 0; 4007 for ($i = 0; $i < $n; ++$i) { 4008 // Make lines have at least one space in them if they're empty 4009 // BenBE: Checking emptiness using trim instead of relying on blanks 4010 if ('' == trim($code[$i])) { 4011 $code[$i] = ' '; 4012 } 4013 // fancy lines 4014 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && 4015 $i % $this->line_nth_row == ($this->line_nth_row - 1)) { 4016 // Set the attributes to style the line 4017 if ($this->use_classes) { 4018 $parsed_code .= '<span class="xtra li2"><span class="de2">'; 4019 } else { 4020 // This style "covers up" the special styles set for special lines 4021 // so that styles applied to special lines don't apply to the actual 4022 // code on that line 4023 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">' 4024 .'<span style="' . $this->code_style .'">'; 4025 } 4026 $close += 2; 4027 } 4028 //Is this some line with extra styles??? 4029 if (in_array($i + 1, $this->highlight_extra_lines)) { 4030 if ($this->use_classes) { 4031 if (isset($this->highlight_extra_lines_styles[$i])) { 4032 $parsed_code .= "<span class=\"xtra lx$i\">"; 4033 } else { 4034 $parsed_code .= "<span class=\"xtra ln-xtra\">"; 4035 } 4036 } else { 4037 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">"; 4038 } 4039 ++$close; 4040 } 4041 4042 $parsed_code .= $code[$i]; 4043 4044 if ($close) { 4045 $parsed_code .= str_repeat('</span>', $close); 4046 $close = 0; 4047 } 4048 if ($i + 1 < $n) { 4049 $parsed_code .= "\n"; 4050 } 4051 unset($code[$i]); 4052 } 4053 4054 if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) { 4055 $parsed_code .= '</pre>'; 4056 } 4057 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4058 $parsed_code .= '</td>'; 4059 } 4060 } 4061 4062 $parsed_code .= $this->footer(); 4063 } 4064 4065 /** 4066 * Creates the header for the code block (with correct attributes) 4067 * 4068 * @return string The header for the code block 4069 * @since 1.0.0 4070 */ 4071 protected function header() { 4072 // Get attributes needed 4073 /** 4074 * @todo Document behaviour change - class is outputted regardless of whether 4075 * we're using classes or not. Same with style 4076 */ 4077 $attributes = ' class="' . $this->_genCSSName($this->language); 4078 if ($this->overall_class != '') { 4079 $attributes .= " ".$this->_genCSSName($this->overall_class); 4080 } 4081 $attributes .= '"'; 4082 4083 if ($this->overall_id != '') { 4084 $attributes .= " id=\"{$this->overall_id}\""; 4085 } 4086 if ($this->overall_style != '' && !$this->use_classes) { 4087 $attributes .= ' style="' . $this->overall_style . '"'; 4088 } 4089 4090 $ol_attributes = ''; 4091 4092 if ($this->line_numbers_start != 1) { 4093 $ol_attributes .= ' start="' . $this->line_numbers_start . '"'; 4094 } 4095 4096 // Get the header HTML 4097 $header = $this->header_content; 4098 if ($header) { 4099 if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) { 4100 $header = str_replace("\n", '', $header); 4101 } 4102 $header = $this->replace_keywords($header); 4103 4104 if ($this->use_classes) { 4105 $attr = ' class="head"'; 4106 } else { 4107 $attr = " style=\"{$this->header_content_style}\""; 4108 } 4109 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4110 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>"; 4111 } else { 4112 $header = "<div$attr>$header</div>"; 4113 } 4114 } 4115 4116 if (GESHI_HEADER_NONE == $this->header_type) { 4117 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4118 return "$header<ol$attributes$ol_attributes>"; 4119 } 4120 return $header . ($this->force_code_block ? '<div>' : ''); 4121 } 4122 4123 // Work out what to return and do it 4124 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4125 if ($this->header_type == GESHI_HEADER_PRE) { 4126 return "<pre$attributes>$header<ol$ol_attributes>"; 4127 } elseif ($this->header_type == GESHI_HEADER_DIV || 4128 $this->header_type == GESHI_HEADER_PRE_VALID) { 4129 return "<div$attributes>$header<ol$ol_attributes>"; 4130 } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) { 4131 return "<table$attributes>$header<tbody><tr class=\"li1\">"; 4132 } 4133 } else { 4134 if ($this->header_type == GESHI_HEADER_PRE) { 4135 return "<pre$attributes>$header" . 4136 ($this->force_code_block ? '<div>' : ''); 4137 } else { 4138 return "<div$attributes>$header" . 4139 ($this->force_code_block ? '<div>' : ''); 4140 } 4141 } 4142 } 4143 4144 /** 4145 * Returns the footer for the code block. 4146 * 4147 * @return string The footer for the code block 4148 * @since 1.0.0 4149 */ 4150 protected function footer() { 4151 $footer = $this->footer_content; 4152 if ($footer) { 4153 if ($this->header_type == GESHI_HEADER_PRE) { 4154 $footer = str_replace("\n", '', $footer);; 4155 } 4156 $footer = $this->replace_keywords($footer); 4157 4158 if ($this->use_classes) { 4159 $attr = ' class="foot"'; 4160 } else { 4161 $attr = " style=\"{$this->footer_content_style}\""; 4162 } 4163 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4164 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>"; 4165 } else { 4166 $footer = "<div$attr>$footer</div>"; 4167 } 4168 } 4169 4170 if (GESHI_HEADER_NONE == $this->header_type) { 4171 return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer; 4172 } 4173 4174 if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) { 4175 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4176 return "</ol>$footer</div>"; 4177 } 4178 return ($this->force_code_block ? '</div>' : '') . 4179 "$footer</div>"; 4180 } 4181 elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) { 4182 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4183 return "</tr></tbody>$footer</table>"; 4184 } 4185 return ($this->force_code_block ? '</div>' : '') . 4186 "$footer</div>"; 4187 } 4188 else { 4189 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4190 return "</ol>$footer</pre>"; 4191 } 4192 return ($this->force_code_block ? '</div>' : '') . 4193 "$footer</pre>"; 4194 } 4195 } 4196 4197 /** 4198 * Replaces certain keywords in the header and footer with 4199 * certain configuration values 4200 * 4201 * @param string $instr The header or footer content to do replacement on 4202 * @return string The header or footer with replaced keywords 4203 * @since 1.0.2 4204 */ 4205 protected function replace_keywords($instr) { 4206 $keywords = $replacements = array(); 4207 4208 $keywords[] = '<TIME>'; 4209 $keywords[] = '{TIME}'; 4210 $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3); 4211 4212 $keywords[] = '<LANGUAGE>'; 4213 $keywords[] = '{LANGUAGE}'; 4214 $replacements[] = $replacements[] = $this->language_data['LANG_NAME']; 4215 4216 $keywords[] = '<VERSION>'; 4217 $keywords[] = '{VERSION}'; 4218 $replacements[] = $replacements[] = GESHI_VERSION; 4219 4220 $keywords[] = '<SPEED>'; 4221 $keywords[] = '{SPEED}'; 4222 if ($time <= 0) { 4223 $speed = 'N/A'; 4224 } else { 4225 $speed = strlen($this->source) / $time; 4226 if ($speed >= 1024) { 4227 $speed = sprintf("%.2f KB/s", $speed / 1024.0); 4228 } else { 4229 $speed = sprintf("%.0f B/s", $speed); 4230 } 4231 } 4232 $replacements[] = $replacements[] = $speed; 4233 4234 return str_replace($keywords, $replacements, $instr); 4235 } 4236 4237 /** 4238 * Secure replacement for PHP built-in function htmlspecialchars(). 4239 * 4240 * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale 4241 * for this replacement function. 4242 * 4243 * The INTERFACE for this function is almost the same as that for 4244 * htmlspecialchars(), with the same default for quote style; however, there 4245 * is no 'charset' parameter. The reason for this is as follows: 4246 * 4247 * The PHP docs say: 4248 * "The third argument charset defines character set used in conversion." 4249 * 4250 * I suspect PHP's htmlspecialchars() is working at the byte-value level and 4251 * thus _needs_ to know (or asssume) a character set because the special 4252 * characters to be replaced could exist at different code points in 4253 * different character sets. (If indeed htmlspecialchars() works at 4254 * byte-value level that goes some way towards explaining why the 4255 * vulnerability would exist in this function, too, and not only in 4256 * htmlentities() which certainly is working at byte-value level.) 4257 * 4258 * This replacement function however works at character level and should 4259 * therefore be "immune" to character set differences - so no charset 4260 * parameter is needed or provided. If a third parameter is passed, it will 4261 * be silently ignored. 4262 * 4263 * In the OUTPUT there is a minor difference in that we use ''' instead 4264 * of PHP's ''' for a single quote: this provides compatibility with 4265 * get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES) 4266 * (see comment by mikiwoz at yahoo dot co dot uk on 4267 * http://php.net/htmlspecialchars); it also matches the entity definition 4268 * for XML 1.0 4269 * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters). 4270 * Like PHP we use a numeric character reference instead of ''' for the 4271 * single quote. For the other special characters we use the named entity 4272 * references, as PHP is doing. 4273 * 4274 * @author {@link http://wikkawiki.org/JavaWoman Marjolein Katsma} 4275 * 4276 * @license http://www.gnu.org/copyleft/lgpl.html 4277 * GNU Lesser General Public License 4278 * @copyright Copyright 2007, {@link http://wikkawiki.org/CreditsPage 4279 * Wikka Development Team} 4280 * 4281 * @param string $string string to be converted 4282 * @param integer $quote_style 4283 * - ENT_COMPAT: escapes &, <, > and double quote (default) 4284 * - ENT_NOQUOTES: escapes only &, < and > 4285 * - ENT_QUOTES: escapes &, <, >, double and single quotes 4286 * @return string converted string 4287 * @since 1.0.7.18 4288 */ 4289 protected function hsc($string, $quote_style = ENT_COMPAT) { 4290 // init 4291 static $aTransSpecchar = array( 4292 '&' => '&', 4293 '"' => '"', 4294 '<' => '<', 4295 '>' => '>', 4296 4297 //This fix is related to SF#1923020, but has to be applied 4298 //regardless of actually highlighting symbols. 4299 4300 //Circumvent a bug with symbol highlighting 4301 //This is required as ; would produce undesirable side-effects if it 4302 //was not to be processed as an entity. 4303 ';' => '<SEMI>', // Force ; to be processed as entity 4304 '|' => '<PIPE>' // Force | to be processed as entity 4305 ); // ENT_COMPAT set 4306 4307 switch ($quote_style) { 4308 case ENT_NOQUOTES: // don't convert double quotes 4309 unset($aTransSpecchar['"']); 4310 break; 4311 case ENT_QUOTES: // convert single quotes as well 4312 $aTransSpecchar["'"] = '''; // (apos) htmlspecialchars() uses ''' 4313 break; 4314 } 4315 4316 // return translated string 4317 return strtr($string, $aTransSpecchar); 4318 } 4319 4320 /** 4321 * Generate a CSS class name from a given string. 4322 * Prevents invalid CSS classes. 4323 * 4324 * @param string $name Proposed class name 4325 * 4326 * @return string Safe CSS class name 4327 */ 4328 protected function _genCSSName($name) { 4329 return (is_numeric($name[0]) ? '_' : '') . $name; 4330 } 4331 4332 /** 4333 * Returns a stylesheet for the highlighted code. If $economy mode 4334 * is true, we only return the stylesheet declarations that matter for 4335 * this code block instead of the whole thing 4336 * 4337 * @param boolean $economy_mode Whether to use economy mode or not 4338 * @return string A stylesheet built on the data for the current language 4339 * @since 1.0.0 4340 */ 4341 public function get_stylesheet($economy_mode = true) { 4342 // If there's an error, chances are that the language file 4343 // won't have populated the language data file, so we can't 4344 // risk getting a stylesheet... 4345 if ($this->error) { 4346 return ''; 4347 } 4348 4349 //Check if the style rearrangements have been processed ... 4350 //This also does some preprocessing to check which style groups are useable ... 4351 if(!isset($this->language_data['NUMBERS_CACHE'])) { 4352 $this->build_style_cache(); 4353 } 4354 4355 // First, work out what the selector should be. If there's an ID, 4356 // that should be used, the same for a class. Otherwise, a selector 4357 // of '' means that these styles will be applied anywhere 4358 if ($this->overall_id) { 4359 $selector = '#' . $this->_genCSSName($this->overall_id); 4360 } else { 4361 $selector = '.' . $this->_genCSSName($this->language); 4362 if ($this->overall_class) { 4363 $selector .= '.' . $this->_genCSSName($this->overall_class); 4364 } 4365 } 4366 $selector .= ' '; 4367 4368 // Header of the stylesheet 4369 if (!$economy_mode) { 4370 $stylesheet = "/**\n". 4371 " * GeSHi Dynamically Generated Stylesheet\n". 4372 " * --------------------------------------\n". 4373 " * Dynamically generated stylesheet for {$this->language}\n". 4374 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n". 4375 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" . 4376 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n". 4377 " * --------------------------------------\n". 4378 " */\n"; 4379 } else { 4380 $stylesheet = "/**\n". 4381 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" . 4382 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n". 4383 " */\n"; 4384 } 4385 4386 // Set the <ol> to have no effect at all if there are line numbers 4387 // (<ol>s have margins that should be destroyed so all layout is 4388 // controlled by the set_overall_style method, which works on the 4389 // <pre> or <div> container). Additionally, set default styles for lines 4390 if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4391 //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n"; 4392 $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n"; 4393 } 4394 4395 // Add overall styles 4396 // note: neglect economy_mode, empty styles are meaningless 4397 if ($this->overall_style != '') { 4398 $stylesheet .= "$selector {{$this->overall_style}}\n"; 4399 } 4400 4401 // Add styles for links 4402 // note: economy mode does not make _any_ sense here 4403 // either the style is empty and thus no selector is needed 4404 // or the appropriate key is given. 4405 foreach ($this->link_styles as $key => $style) { 4406 if ($style != '') { 4407 switch ($key) { 4408 case GESHI_LINK: 4409 $stylesheet .= "{$selector}a:link {{$style}}\n"; 4410 break; 4411 case GESHI_HOVER: 4412 $stylesheet .= "{$selector}a:hover {{$style}}\n"; 4413 break; 4414 case GESHI_ACTIVE: 4415 $stylesheet .= "{$selector}a:active {{$style}}\n"; 4416 break; 4417 case GESHI_VISITED: 4418 $stylesheet .= "{$selector}a:visited {{$style}}\n"; 4419 break; 4420 } 4421 } 4422 } 4423 4424 // Header and footer 4425 // note: neglect economy_mode, empty styles are meaningless 4426 if ($this->header_content_style != '') { 4427 $stylesheet .= "$selector.head {{$this->header_content_style}}\n"; 4428 } 4429 if ($this->footer_content_style != '') { 4430 $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n"; 4431 } 4432 4433 // Styles for important stuff 4434 // note: neglect economy_mode, empty styles are meaningless 4435 if ($this->important_styles != '') { 4436 $stylesheet .= "$selector.imp {{$this->important_styles}}\n"; 4437 } 4438 4439 // Simple line number styles 4440 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') { 4441 $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n"; 4442 } 4443 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') { 4444 $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n"; 4445 } 4446 // If there is a style set for fancy line numbers, echo it out 4447 if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') { 4448 $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n"; 4449 } 4450 4451 // note: empty styles are meaningless 4452 foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) { 4453 if ($styles != '' && (!$economy_mode || 4454 (isset($this->lexic_permissions['KEYWORDS'][$group]) && 4455 $this->lexic_permissions['KEYWORDS'][$group]))) { 4456 $stylesheet .= "$selector.kw$group {{$styles}}\n"; 4457 } 4458 } 4459 foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) { 4460 if ($styles != '' && (!$economy_mode || 4461 (isset($this->lexic_permissions['COMMENTS'][$group]) && 4462 $this->lexic_permissions['COMMENTS'][$group]) || 4463 (!empty($this->language_data['COMMENT_REGEXP']) && 4464 !empty($this->language_data['COMMENT_REGEXP'][$group])))) { 4465 $stylesheet .= "$selector.co$group {{$styles}}\n"; 4466 } 4467 } 4468 foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) { 4469 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) { 4470 // NEW: since 1.0.8 we have to handle hardescapes 4471 if ($group === 'HARD') { 4472 $group = '_h'; 4473 } 4474 $stylesheet .= "$selector.es$group {{$styles}}\n"; 4475 } 4476 } 4477 foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) { 4478 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) { 4479 $stylesheet .= "$selector.br$group {{$styles}}\n"; 4480 } 4481 } 4482 foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) { 4483 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) { 4484 $stylesheet .= "$selector.sy$group {{$styles}}\n"; 4485 } 4486 } 4487 foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) { 4488 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) { 4489 // NEW: since 1.0.8 we have to handle hardquotes 4490 if ($group === 'HARD') { 4491 $group = '_h'; 4492 } 4493 $stylesheet .= "$selector.st$group {{$styles}}\n"; 4494 } 4495 } 4496 foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) { 4497 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) { 4498 $stylesheet .= "$selector.nu$group {{$styles}}\n"; 4499 } 4500 } 4501 foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) { 4502 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) { 4503 $stylesheet .= "$selector.me$group {{$styles}}\n"; 4504 } 4505 } 4506 // note: neglect economy_mode, empty styles are meaningless 4507 foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) { 4508 if ($styles != '') { 4509 $stylesheet .= "$selector.sc$group {{$styles}}\n"; 4510 } 4511 } 4512 foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) { 4513 if ($styles != '' && (!$economy_mode || 4514 (isset($this->lexic_permissions['REGEXPS'][$group]) && 4515 $this->lexic_permissions['REGEXPS'][$group]))) { 4516 if (is_array($this->language_data['REGEXPS'][$group]) && 4517 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) { 4518 $stylesheet .= "$selector."; 4519 $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS]; 4520 $stylesheet .= " {{$styles}}\n"; 4521 } else { 4522 $stylesheet .= "$selector.re$group {{$styles}}\n"; 4523 } 4524 } 4525 } 4526 // Styles for lines being highlighted extra 4527 if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) { 4528 $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n"; 4529 } 4530 $stylesheet .= "{$selector}span.xtra { display:block; }\n"; 4531 foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) { 4532 $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n"; 4533 } 4534 4535 return $stylesheet; 4536 } 4537 4538 /** 4539 * Get's the style that is used for the specified line 4540 * 4541 * @param int $line The line number information is requested for 4542 * @since 1.0.7.21 4543 */ 4544 protected function get_line_style($line) { 4545 $style = null; 4546 if (isset($this->highlight_extra_lines_styles[$line])) { 4547 $style = $this->highlight_extra_lines_styles[$line]; 4548 } else { // if no "extra" style assigned 4549 $style = $this->highlight_extra_lines_style; 4550 } 4551 4552 return $style; 4553 } 4554 4555 /** 4556 * this functions creates an optimized regular expression list 4557 * of an array of strings. 4558 * 4559 * Example: 4560 * <code>$list = array('faa', 'foo', 'foobar'); 4561 * => string 'f(aa|oo(bar)?)'</code> 4562 * 4563 * @param array $list array of (unquoted) strings 4564 * @param string $regexp_delimiter your regular expression delimiter, @see preg_quote() 4565 * @return string for regular expression 4566 * @author Milian Wolff <mail@milianw.de> 4567 * @since 1.0.8 4568 */ 4569 protected function optimize_regexp_list($list, $regexp_delimiter = '/') { 4570 $regex_chars = array('.', '\\', '+', '-', '*', '?', '[', '^', ']', '$', 4571 '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter); 4572 sort($list); 4573 $regexp_list = array(''); 4574 $num_subpatterns = 0; 4575 $list_key = 0; 4576 4577 // the tokens which we will use to generate the regexp list 4578 $tokens = array(); 4579 $prev_keys = array(); 4580 // go through all entries of the list and generate the token list 4581 $cur_len = 0; 4582 for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) { 4583 if ($cur_len > GESHI_MAX_PCRE_LENGTH) { 4584 // seems like the length of this pcre is growing exorbitantly 4585 $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens); 4586 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:'); 4587 $tokens = array(); 4588 $cur_len = 0; 4589 } 4590 $level = 0; 4591 $entry = preg_quote((string) $list[$i], $regexp_delimiter); 4592 $pointer = &$tokens; 4593 // properly assign the new entry to the correct position in the token array 4594 // possibly generate smaller common denominator keys 4595 while (true) { 4596 // get the common denominator 4597 if (isset($prev_keys[$level])) { 4598 if ($prev_keys[$level] == $entry) { 4599 // this is a duplicate entry, skip it 4600 continue 2; 4601 } 4602 $char = 0; 4603 while (isset($entry[$char]) && isset($prev_keys[$level][$char]) 4604 && $entry[$char] == $prev_keys[$level][$char]) { 4605 ++$char; 4606 } 4607 if ($char > 0) { 4608 // this entry has at least some chars in common with the current key 4609 if ($char == strlen($prev_keys[$level])) { 4610 // current key is totally matched, i.e. this entry has just some bits appended 4611 $pointer = &$pointer[$prev_keys[$level]]; 4612 } else { 4613 // only part of the keys match 4614 $new_key_part1 = substr($prev_keys[$level], 0, $char); 4615 $new_key_part2 = substr($prev_keys[$level], $char); 4616 4617 if (in_array($new_key_part1[0], $regex_chars) 4618 || in_array($new_key_part2[0], $regex_chars)) { 4619 // this is bad, a regex char as first character 4620 $pointer[$entry] = array('' => true); 4621 array_splice($prev_keys, $level, count($prev_keys), $entry); 4622 $cur_len += strlen($entry); 4623 continue; 4624 } else { 4625 // relocate previous tokens 4626 $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]); 4627 unset($pointer[$prev_keys[$level]]); 4628 $pointer = &$pointer[$new_key_part1]; 4629 // recreate key index 4630 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2)); 4631 $cur_len += strlen($new_key_part2); 4632 } 4633 } 4634 ++$level; 4635 $entry = substr($entry, $char); 4636 continue; 4637 } 4638 // else: fall trough, i.e. no common denominator was found 4639 } 4640 if ($level == 0 && !empty($tokens)) { 4641 // we can dump current tokens into the string and throw them away afterwards 4642 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens); 4643 $new_subpatterns = substr_count($new_entry, '(?:'); 4644 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) { 4645 $regexp_list[++$list_key] = $new_entry; 4646 $num_subpatterns = $new_subpatterns; 4647 } else { 4648 if (!empty($regexp_list[$list_key])) { 4649 $new_entry = '|' . $new_entry; 4650 } 4651 $regexp_list[$list_key] .= $new_entry; 4652 $num_subpatterns += $new_subpatterns; 4653 } 4654 $tokens = array(); 4655 $cur_len = 0; 4656 } 4657 // no further common denominator found 4658 $pointer[$entry] = array('' => true); 4659 array_splice($prev_keys, $level, count($prev_keys), $entry); 4660 4661 $cur_len += strlen($entry); 4662 break; 4663 } 4664 unset($list[$i]); 4665 } 4666 // make sure the last tokens get converted as well 4667 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens); 4668 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) { 4669 if ( !empty($regexp_list[$list_key]) ) { 4670 ++$list_key; 4671 } 4672 $regexp_list[$list_key] = $new_entry; 4673 } else { 4674 if (!empty($regexp_list[$list_key])) { 4675 $new_entry = '|' . $new_entry; 4676 } 4677 $regexp_list[$list_key] .= $new_entry; 4678 } 4679 return $regexp_list; 4680 } 4681 4682 /** 4683 * this function creates the appropriate regexp string of an token array 4684 * you should not call this function directly, @see $this->optimize_regexp_list(). 4685 * 4686 * @param array $tokens array of tokens 4687 * @param bool $recursed to know wether we recursed or not 4688 * @return string 4689 * @author Milian Wolff <mail@milianw.de> 4690 * @since 1.0.8 4691 */ 4692 protected function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) { 4693 $list = ''; 4694 foreach ($tokens as $token => $sub_tokens) { 4695 $list .= $token; 4696 $close_entry = isset($sub_tokens['']); 4697 unset($sub_tokens['']); 4698 if (!empty($sub_tokens)) { 4699 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')'; 4700 if ($close_entry) { 4701 // make sub_tokens optional 4702 $list .= '?'; 4703 } 4704 } 4705 $list .= '|'; 4706 } 4707 if (!$recursed) { 4708 // do some optimizations 4709 // common trailing strings 4710 // BUGGY! 4711 //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function( 4712 // '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list); 4713 // (?:p)? => p? 4714 $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list); 4715 // (?:a|b|c|d|...)? => [abcd...]? 4716 // TODO: a|bb|c => [ac]|bb 4717 static $callback_2; 4718 if (!isset($callback_2)) { 4719 $callback_2 = function($matches) { 4720 return "[" . str_replace("|", "", $matches[1]) . "]"; 4721 }; 4722 } 4723 $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list); 4724 } 4725 // return $list without trailing pipe 4726 return substr($list, 0, -1); 4727 } 4728} // End Class GeSHi 4729 4730 4731if (!function_exists('geshi_highlight')) { 4732 /** 4733 * Easy way to highlight stuff. Behaves just like highlight_string 4734 * 4735 * @param string $string The code to highlight 4736 * @param string $language The language to highlight the code in 4737 * @param string $path The path to the language files. You can leave this blank if you need 4738 * as from version 1.0.7 the path should be automatically detected 4739 * @param boolean $return Whether to return the result or to echo 4740 * @return string The code highlighted (if $return is true) 4741 * @since 1.0.2 4742 */ 4743 function geshi_highlight($string, $language, $path = null, $return = false) { 4744 $geshi = new GeSHi($string, $language, $path); 4745 $geshi->set_header_type(GESHI_HEADER_NONE); 4746 4747 if ($return) { 4748 return '<code>' . $geshi->parse_code() . '</code>'; 4749 } 4750 4751 echo '<code>' . $geshi->parse_code() . '</code>'; 4752 4753 if ($geshi->error()) { 4754 return false; 4755 } 4756 return true; 4757 } 4758} 4759