1 <?php
2 /**
3  * GeSHi - Generic Syntax Highlighter
4  *
5  * The GeSHi class for Generic Syntax Highlighting. Please refer to the
6  * documentation at http://qbnz.com/highlighter/documentation.php for more
7  * information about how to use this class.
8  *
9  * For changes, release notes, TODOs etc, see the relevant files in the docs/
10  * directory.
11  *
12  *   This file is part of GeSHi.
13  *
14  *  GeSHi is free software; you can redistribute it and/or modify
15  *  it under the terms of the GNU General Public License as published by
16  *  the Free Software Foundation; either version 2 of the License, or
17  *  (at your option) any later version.
18  *
19  *  GeSHi is distributed in the hope that it will be useful,
20  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22  *  GNU General Public License for more details.
23  *
24  *  You should have received a copy of the GNU General Public License
25  *  along with GeSHi; if not, write to the Free Software
26  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
27  *
28  * @package    geshi
29  * @subpackage core
30  * @author     Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
31  * @copyright  (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2014 Benny Baumann
32  * @license    http://gnu.org/copyleft/gpl.html GNU GPL
33  */
34 
35 //
36 // GeSHi Constants
37 // You should use these constant names in your programs instead of
38 // their values - you never know when a value may change in a future
39 // version
40 //
41 
42 /** The version of this GeSHi file */
43 define('GESHI_VERSION', '1.0.9.1');
44 
45 // Define the root directory for the GeSHi code tree
46 if (!defined('GESHI_ROOT')) {
47     /** The root directory for GeSHi */
48     define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
49 }
50 /** The language file directory for GeSHi
51     @access private */
52 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
53 
54 // Define if GeSHi should be paranoid about security
55 if (!defined('GESHI_SECURITY_PARANOID')) {
56     /** Tells GeSHi to be paranoid about security settings */
57     define('GESHI_SECURITY_PARANOID', false);
58 }
59 
60 // Line numbers - use with enable_line_numbers()
61 /** Use no line numbers when building the result */
62 define('GESHI_NO_LINE_NUMBERS', 0);
63 /** Use normal line numbers when building the result */
64 define('GESHI_NORMAL_LINE_NUMBERS', 1);
65 /** Use fancy line numbers when building the result */
66 define('GESHI_FANCY_LINE_NUMBERS', 2);
67 
68 // Container HTML type
69 /** Use nothing to surround the source */
70 define('GESHI_HEADER_NONE', 0);
71 /** Use a "div" to surround the source */
72 define('GESHI_HEADER_DIV', 1);
73 /** Use a "pre" to surround the source */
74 define('GESHI_HEADER_PRE', 2);
75 /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
76 define('GESHI_HEADER_PRE_VALID', 3);
77 /**
78  * Use a "table" to surround the source:
79  *
80  *  <table>
81  *    <thead><tr><td colspan="2">$header</td></tr></thead>
82  *    <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
83  *    <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
84  *  </table>
85  *
86  * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
87  * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
88  * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
89  */
90 define('GESHI_HEADER_PRE_TABLE', 4);
91 
92 // Capatalisation constants
93 /** Lowercase keywords found */
94 define('GESHI_CAPS_NO_CHANGE', 0);
95 /** Uppercase keywords found */
96 define('GESHI_CAPS_UPPER', 1);
97 /** Leave keywords found as the case that they are */
98 define('GESHI_CAPS_LOWER', 2);
99 
100 // Link style constants
101 /** Links in the source in the :link state */
102 define('GESHI_LINK', 0);
103 /** Links in the source in the :hover state */
104 define('GESHI_HOVER', 1);
105 /** Links in the source in the :active state */
106 define('GESHI_ACTIVE', 2);
107 /** Links in the source in the :visited state */
108 define('GESHI_VISITED', 3);
109 
110 // Important string starter/finisher
111 // Note that if you change these, they should be as-is: i.e., don't
112 // write them as if they had been run through htmlentities()
113 /** The starter for important parts of the source */
114 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
115 /** The ender for important parts of the source */
116 define('GESHI_END_IMPORTANT', '<END GeSHi>');
117 
118 /**#@+
119  *  @access private
120  */
121 // When strict mode applies for a language
122 /** Strict mode never applies (this is the most common) */
123 define('GESHI_NEVER', 0);
124 /** Strict mode *might* apply, and can be enabled or
125     disabled by {@link GeSHi->enable_strict_mode()} */
126 define('GESHI_MAYBE', 1);
127 /** Strict mode always applies */
128 define('GESHI_ALWAYS', 2);
129 
130 // Advanced regexp handling constants, used in language files
131 /** The key of the regex array defining what to search for */
132 define('GESHI_SEARCH', 0);
133 /** The key of the regex array defining what bracket group in a
134     matched search to use as a replacement */
135 define('GESHI_REPLACE', 1);
136 /** The key of the regex array defining any modifiers to the regular expression */
137 define('GESHI_MODIFIERS', 2);
138 /** The key of the regex array defining what bracket group in a
139     matched search to put before the replacement */
140 define('GESHI_BEFORE', 3);
141 /** The key of the regex array defining what bracket group in a
142     matched search to put after the replacement */
143 define('GESHI_AFTER', 4);
144 /** The key of the regex array defining a custom keyword to use
145     for this regexp's html tag class */
146 define('GESHI_CLASS', 5);
147 
148 /** Used in language files to mark comments */
149 define('GESHI_COMMENTS', 0);
150 
151 /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
152     regular expressions. Set this to false if your PCRE lib is up to date
153     @see GeSHi->optimize_regexp_list()
154     **/
155 define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
156 /** it's also important not to generate too long regular expressions
157     be generous here... but keep in mind, that when reaching this limit we
158     still have to close open patterns. 12k should do just fine on a 16k limit.
159     @see GeSHi->optimize_regexp_list()
160     **/
161 define('GESHI_MAX_PCRE_LENGTH', 12288);
162 
163 //Number format specification
164 /** Basic number format for integers */
165 define('GESHI_NUMBER_INT_BASIC', 1);        //Default integers \d+
166 /** Enhanced number format for integers like seen in C */
167 define('GESHI_NUMBER_INT_CSTYLE', 2);       //Default C-Style \d+[lL]?
168 /** Number format to highlight binary numbers with a suffix "b" */
169 define('GESHI_NUMBER_BIN_SUFFIX', 16);           //[01]+[bB]
170 /** Number format to highlight binary numbers with a prefix % */
171 define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32);   //%[01]+
172 /** Number format to highlight binary numbers with a prefix 0b (C) */
173 define('GESHI_NUMBER_BIN_PREFIX_0B', 64);        //0b[01]+
174 /** Number format to highlight octal numbers with a leading zero */
175 define('GESHI_NUMBER_OCT_PREFIX', 256);           //0[0-7]+
176 /** Number format to highlight octal numbers with a prefix 0o (logtalk) */
177 define('GESHI_NUMBER_OCT_PREFIX_0O', 512);           //0[0-7]+
178 /** Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series). */
179 define('GESHI_NUMBER_OCT_PREFIX_AT', 1024);           //@[0-7]+
180 /** Number format to highlight octal numbers with a suffix of o */
181 define('GESHI_NUMBER_OCT_SUFFIX', 2048);           //[0-7]+[oO]
182 /** Number format to highlight hex numbers with a prefix 0x */
183 define('GESHI_NUMBER_HEX_PREFIX', 4096);           //0x[0-9a-fA-F]+
184 /** Number format to highlight hex numbers with a prefix $ */
185 define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192);           //$[0-9a-fA-F]+
186 /** Number format to highlight hex numbers with a suffix of h */
187 define('GESHI_NUMBER_HEX_SUFFIX', 16384);           //[0-9][0-9a-fA-F]*h
188 /** Number format to highlight floating-point numbers without support for scientific notation */
189 define('GESHI_NUMBER_FLT_NONSCI', 65536);          //\d+\.\d+
190 /** Number format to highlight floating-point numbers without support for scientific notation */
191 define('GESHI_NUMBER_FLT_NONSCI_F', 131072);       //\d+(\.\d+)?f
192 /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
193 define('GESHI_NUMBER_FLT_SCI_SHORT', 262144);      //\.\d+e\d+
194 /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
195 define('GESHI_NUMBER_FLT_SCI_ZERO', 524288);       //\d+(\.\d+)?e\d+
196 //Custom formats are passed by RX array
197 
198 // Error detection - use these to analyse faults
199 /** No sourcecode to highlight was specified
200  * @deprecated
201  */
202 define('GESHI_ERROR_NO_INPUT', 1);
203 /** The language specified does not exist */
204 define('GESHI_ERROR_NO_SUCH_LANG', 2);
205 /** GeSHi could not open a file for reading (generally a language file) */
206 define('GESHI_ERROR_FILE_NOT_READABLE', 3);
207 /** The header type passed to {@link GeSHi->set_header_type()} was invalid */
208 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
209 /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
210 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
211 /**#@-*/
212 
213 
214 /**
215  * The GeSHi Class.
216  *
217  * Please refer to the documentation for GeSHi 1.0.X that is available
218  * at http://qbnz.com/highlighter/documentation.php for more information
219  * about how to use this class.
220  *
221  * @package   geshi
222  * @author    Nigel McNie <nigel@geshi.org>
223  * @author    Benny Baumann <BenBE@omorphia.de>
224  * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2014 Benny Baumann
225  */
226 class GeSHi {
227 
228     /**
229      * The source code to highlight
230      * @var string
231      */
232     protected $source = '';
233 
234     /**
235      * The language to use when highlighting
236      * @var string
237      */
238     protected $language = '';
239 
240     /**
241      * The data for the language used
242      * @var array
243      */
244     protected $language_data = array();
245 
246     /**
247      * The path to the language files
248      * @var string
249      */
250     protected $language_path = GESHI_LANG_ROOT;
251 
252     /**
253      * The error message associated with an error
254      * @var string
255      * @todo check err reporting works
256      */
257     protected $error = false;
258 
259     /**
260      * Possible error messages
261      * @var array
262      */
263     protected $error_messages = array(
264         GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
265         GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
266         GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
267         GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
268     );
269 
270     /**
271      * Whether highlighting is strict or not
272      * @var boolean
273      */
274     protected $strict_mode = false;
275 
276     /**
277      * Whether to use CSS classes in output
278      * @var boolean
279      */
280     protected $use_classes = false;
281 
282     /**
283      * The type of header to use. Can be one of the following
284      * values:
285      *
286      * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
287      * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
288      * - GESHI_HEADER_NONE: No header is outputted.
289      *
290      * @var int
291      */
292     protected $header_type = GESHI_HEADER_PRE;
293 
294     /**
295      * Array of permissions for which lexics should be highlighted
296      * @var array
297      */
298     protected $lexic_permissions = array(
299         'KEYWORDS' =>    array(),
300         'COMMENTS' =>    array('MULTI' => true),
301         'REGEXPS' =>     array(),
302         'ESCAPE_CHAR' => true,
303         'BRACKETS' =>    true,
304         'SYMBOLS' =>     false,
305         'STRINGS' =>     true,
306         'NUMBERS' =>     true,
307         'METHODS' =>     true,
308         'SCRIPT' =>      true
309     );
310 
311     /**
312      * The time it took to parse the code
313      * @var double
314      */
315     protected $time = 0;
316 
317     /**
318      * The content of the header block
319      * @var string
320      */
321     protected $header_content = '';
322 
323     /**
324      * The content of the footer block
325      * @var string
326      */
327     protected $footer_content = '';
328 
329     /**
330      * The style of the header block
331      * @var string
332      */
333     protected $header_content_style = '';
334 
335     /**
336      * The style of the footer block
337      * @var string
338      */
339     protected $footer_content_style = '';
340 
341     /**
342      * Tells if a block around the highlighted source should be forced
343      * if not using line numbering
344      * @var boolean
345      */
346     protected $force_code_block = false;
347 
348     /**
349      * The styles for hyperlinks in the code
350      * @var array
351      */
352     protected $link_styles = array();
353 
354     /**
355      * Whether important blocks should be recognised or not
356      * @var boolean
357      * @deprecated
358      * @todo REMOVE THIS FUNCTIONALITY!
359      */
360     protected $enable_important_blocks = false;
361 
362     /**
363      * Styles for important parts of the code
364      * @var string
365      * @deprecated
366      * @todo As above - rethink the whole idea of important blocks as it is buggy and
367      * will be hard to implement in 1.2
368      */
369     protected $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
370 
371     /**
372      * Whether CSS IDs should be added to the code
373      * @var boolean
374      */
375     protected $add_ids = false;
376 
377     /**
378      * Lines that should be highlighted extra
379      * @var array
380      */
381     protected $highlight_extra_lines = array();
382 
383     /**
384      * Styles of lines that should be highlighted extra
385      * @var array
386      */
387     protected $highlight_extra_lines_styles = array();
388 
389     /**
390      * Styles of extra-highlighted lines
391      * @var string
392      */
393     protected $highlight_extra_lines_style = 'background-color: #ffc;';
394 
395     /**
396      * The line ending
397      * If null, nl2br() will be used on the result string.
398      * Otherwise, all instances of \n will be replaced with $line_ending
399      * @var string
400      */
401     protected $line_ending = null;
402 
403     /**
404      * Number at which line numbers should start at
405      * @var int
406      */
407     protected $line_numbers_start = 1;
408 
409     /**
410      * The overall style for this code block
411      * @var string
412      */
413     protected $overall_style = 'font-family:monospace;';
414 
415     /**
416      *  The style for the actual code
417      * @var string
418      */
419     protected $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
420 
421     /**
422      * The overall class for this code block
423      * @var string
424      */
425     protected $overall_class = '';
426 
427     /**
428      * The overall ID for this code block
429      * @var string
430      */
431     protected $overall_id = '';
432 
433     /**
434      * Line number styles
435      * @var string
436      */
437     protected $line_style1 = 'font-weight: normal; vertical-align:top;';
438 
439     /**
440      * Line number styles for fancy lines
441      * @var string
442      */
443     protected $line_style2 = 'font-weight: bold; vertical-align:top;';
444 
445     /**
446      * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
447      * @var string
448      */
449     protected $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
450 
451     /**
452      * Flag for how line numbers are displayed
453      * @var boolean
454      */
455     protected $line_numbers = GESHI_NO_LINE_NUMBERS;
456 
457     /**
458      * Flag to decide if multi line spans are allowed. Set it to false to make sure
459      * each tag is closed before and reopened after each linefeed.
460      * @var boolean
461      */
462     protected $allow_multiline_span = true;
463 
464     /**
465      * The "nth" value for fancy line highlighting
466      * @var int
467      */
468     protected $line_nth_row = 0;
469 
470     /**
471      * The size of tab stops
472      * @var int
473      */
474     protected $tab_width = 8;
475 
476     /**
477      * Should we use language-defined tab stop widths?
478      * @var int
479      */
480     protected $use_language_tab_width = false;
481 
482     /**
483      * Default target for keyword links
484      * @var string
485      */
486     protected $link_target = '';
487 
488     /**
489      * The encoding to use for entity encoding
490      * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
491      * @var string
492      */
493     protected $encoding = 'utf-8';
494 
495     /**
496      * Should keywords be linked?
497      * @var boolean
498      */
499     protected $keyword_links = true;
500 
501     /**
502      * Currently loaded language file
503      * @var    string
504      * @since 1.0.7.22
505      */
506     protected $loaded_language = '';
507 
508     /**
509      * Wether the caches needed for parsing are built or not
510      *
511      * @var   bool
512      * @since 1.0.8
513      */
514     protected $parse_cache_built = false;
515 
516     /**
517      * Work around for Suhosin Patch with disabled /e modifier
518      *
519      * Note from suhosins author in config file:
520      * <blockquote>
521      *   The /e modifier inside <code>preg_replace()</code> allows code execution.
522      *   Often it is the cause for remote code execution exploits. It is wise to
523      *   deactivate this feature and test where in the application it is used.
524      *   The developer using the /e modifier should be made aware that he should
525      *   use <code>preg_replace_callback()</code> instead
526      * </blockquote>
527      *
528      * @var   array
529      * @since 1.0.8
530      */
531     protected $_kw_replace_group = 0;
532     protected $_rx_key = 0;
533 
534     /**
535      * some "callback parameters" for handle_multiline_regexps
536      *
537      * @since  1.0.8
538      * @access private
539      * @var    string
540      */
541     protected $_hmr_before = '';
542     protected $_hmr_replace = '';
543     protected $_hmr_after = '';
544     protected $_hmr_key = 0;
545 
546     /**
547      * Creates a new GeSHi object, with source and language
548      *
549      * @param string $source   The source code to highlight
550      * @param string $language The language to highlight the source with
551      * @param string $path     The path to the language file directory. <b>This
552      *               is deprecated!</b> I've backported the auto path
553      *               detection from the 1.1.X dev branch, so now it
554      *               should be automatically set correctly. If you have
555      *               renamed the language directory however, you will
556      *               still need to set the path using this parameter or
557      *               {@link GeSHi->set_language_path()}
558      * @since 1.0.0
559      */
560     public function __construct($source = '', $language = '', $path = '') {
561         if ( is_string($source) && ($source !== '') ) {
562             $this->set_source($source);
563         }
564         if ( is_string($language) && ($language !== '') ) {
565             $this->set_language($language);
566         }
567         $this->set_language_path($path);
568     }
569 
570     /**
571      * Returns the version of GeSHi
572      *
573      * @return string
574      * @since  1.0.8.11
575      */
576     public function get_version()
577     {
578         return GESHI_VERSION;
579     }
580 
581     /**
582      * Returns an error message associated with the last GeSHi operation,
583      * or false if no error has occurred
584      *
585      * @return string|false An error message if there has been an error, else false
586      * @since  1.0.0
587      */
588     public function error() {
589         if ($this->error) {
590             //Put some template variables for debugging here ...
591             $debug_tpl_vars = array(
592                 '{LANGUAGE}' => $this->language,
593                 '{PATH}' => $this->language_path
594             );
595             $msg = str_replace(
596                 array_keys($debug_tpl_vars),
597                 array_values($debug_tpl_vars),
598                 $this->error_messages[$this->error]);
599 
600             return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
601         }
602         return false;
603     }
604 
605     /**
606      * Gets a human-readable language name (thanks to Simon Patterson
607      * for the idea :))
608      *
609      * @return string The name for the current language
610      * @since  1.0.2
611      */
612     public function get_language_name() {
613         if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
614             return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
615         }
616         return $this->language_data['LANG_NAME'];
617     }
618 
619     /**
620      * Sets the source code for this object
621      *
622      * @param string $source The source code to highlight
623      * @since 1.0.0
624      */
625     public function set_source($source) {
626         $this->source = $source;
627         $this->highlight_extra_lines = array();
628     }
629 
630     /**
631      * Clean up the language name to prevent malicious code injection
632      *
633      * @param string $language The name of the language to strip
634      * @since 1.0.9.1
635      */
636     public function strip_language_name($language) {
637         $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
638         $language = strtolower($language);
639 
640         return $language;
641     }
642 
643     /**
644      * Sets the language for this object
645      *
646      * @note since 1.0.8 this function won't reset language-settings by default anymore!
647      *       if you need this set $force_reset = true
648      *
649      * @param string $language    The name of the language to use
650      * @param bool   $force_reset
651      * @since 1.0.0
652      */
653     public function set_language($language, $force_reset = false) {
654         $this->error = false;
655         $this->strict_mode = GESHI_NEVER;
656 
657         if ($force_reset) {
658             $this->loaded_language = false;
659         }
660 
661         //Clean up the language name to prevent malicious code injection
662         $language = $this->strip_language_name($language);
663 
664         //Retreive the full filename
665         $file_name = $this->language_path . $language . '.php';
666         if ($file_name == $this->loaded_language) {
667             // this language is already loaded!
668             return;
669         }
670 
671         $this->language = $language;
672 
673         //Check if we can read the desired file
674         if (!is_readable($file_name)) {
675             $this->error = GESHI_ERROR_NO_SUCH_LANG;
676             return;
677         }
678 
679         // Load the language for parsing
680         $this->load_language($file_name);
681     }
682 
683     /**
684      * Sets the path to the directory containing the language files. Note
685      * that this path is relative to the directory of the script that included
686      * geshi.php, NOT geshi.php itself.
687      *
688      * @param string $path The path to the language directory
689      * @since 1.0.0
690      * @deprecated The path to the language files should now be automatically
691      *             detected, so this method should no longer be needed. The
692      *             1.1.X branch handles manual setting of the path differently
693      *             so this method will disappear in 1.2.0.
694      */
695     public function set_language_path($path) {
696         if(strpos($path,':')) {
697             //Security Fix to prevent external directories using fopen wrappers.
698             if(DIRECTORY_SEPARATOR == "\\") {
699                 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
700                     return;
701                 }
702             } else {
703                 return;
704             }
705         }
706         if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
707             //Security Fix to prevent external directories using fopen wrappers.
708             return;
709         }
710         if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
711             //Security Fix to prevent external directories using fopen wrappers.
712             return;
713         }
714         if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
715             //Security Fix to prevent external directories using fopen wrappers.
716             return;
717         }
718         if ($path) {
719             $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
720             $this->set_language($this->language); // otherwise set_language_path has no effect
721         }
722     }
723 
724     /**
725      * Get supported langs or an associative array lang=>full_name.
726      * @param boolean $full_names
727      * @return array
728      */
729     public function get_supported_languages($full_names=false)
730     {
731         // return array
732         $back = array();
733 
734         // we walk the lang root
735         $dir = dir($this->language_path);
736 
737         // foreach entry
738         while (false !== ($entry = $dir->read()))
739         {
740             $full_path = $this->language_path.$entry;
741 
742             // Skip all dirs
743             if (is_dir($full_path)) {
744                 continue;
745             }
746 
747             // we only want lang.php files
748             if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) {
749                 continue;
750             }
751 
752             // Raw lang name is here
753             $langname = $matches[1];
754 
755             // We want the fullname too?
756             if ($full_names === true)
757             {
758                 if (false !== ($fullname = $this->get_language_fullname($langname)))
759                 {
760                     $back[$langname] = $fullname; // we go associative
761                 }
762             }
763             else
764             {
765                 // just store raw langname
766                 $back[] = $langname;
767             }
768         }
769 
770         $dir->close();
771 
772         return $back;
773     }
774 
775     /**
776      * Get full_name for a lang or false.
777      * @param string $language short langname (html4strict for example)
778      * @return mixed
779      */
780     public function get_language_fullname($language)
781     {
782         //Clean up the language name to prevent malicious code injection
783         $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
784 
785         $language = strtolower($language);
786 
787         // get fullpath-filename for a langname
788         $fullpath = $this->language_path.$language.'.php';
789 
790         // we need to get contents :S
791         if (false === ($data = file_get_contents($fullpath))) {
792             $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language);
793             return false;
794         }
795 
796         // match the langname
797         if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+?)\'/', $data, $matches)) {
798             $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language);
799             return false;
800         }
801 
802         // return fullname for langname
803         return stripcslashes($matches[1]);
804     }
805 
806     /**
807      * Sets the type of header to be used.
808      *
809      * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
810      * means more source code but more control over tab width and line-wrapping.
811      * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
812      * control. Default is GESHI_HEADER_PRE.
813      *
814      * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
815      * should be outputted.
816      *
817      * @param int $type The type of header to be used
818      * @since 1.0.0
819      */
820     public function set_header_type($type) {
821         //Check if we got a valid header type
822         if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
823             GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
824             $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
825             return;
826         }
827 
828         //Set that new header type
829         $this->header_type = $type;
830     }
831 
832     /**
833      * Sets the styles for the code that will be outputted
834      * when this object is parsed. The style should be a
835      * string of valid stylesheet declarations
836      *
837      * @param string  $style             The overall style for the outputted code block
838      * @param boolean $preserve_defaults Whether to merge the styles with the current styles or not
839      * @since 1.0.0
840      */
841     public function set_overall_style($style, $preserve_defaults = false) {
842         if (!$preserve_defaults) {
843             $this->overall_style = $style;
844         } else {
845             $this->overall_style .= $style;
846         }
847     }
848 
849     /**
850      * Sets the overall classname for this block of code. This
851      * class can then be used in a stylesheet to style this object's
852      * output
853      *
854      * @param string $class The class name to use for this block of code
855      * @since 1.0.0
856      */
857     public function set_overall_class($class) {
858         $this->overall_class = $class;
859     }
860 
861     /**
862      * Sets the overall id for this block of code. This id can then
863      * be used in a stylesheet to style this object's output
864      *
865      * @param string $id The ID to use for this block of code
866      * @since 1.0.0
867      */
868     public function set_overall_id($id) {
869         $this->overall_id = $id;
870     }
871 
872     /**
873      * Sets whether CSS classes should be used to highlight the source. Default
874      * is off, calling this method with no arguments will turn it on
875      *
876      * @param boolean $flag Whether to turn classes on or not
877      * @since 1.0.0
878      */
879     public function enable_classes($flag = true) {
880         $this->use_classes = ($flag) ? true : false;
881     }
882 
883     /**
884      * Sets the style for the actual code. This should be a string
885      * containing valid stylesheet declarations. If $preserve_defaults is
886      * true, then styles are merged with the default styles, with the
887      * user defined styles having priority
888      *
889      * Note: Use this method to override any style changes you made to
890      * the line numbers if you are using line numbers, else the line of
891      * code will have the same style as the line number! Consult the
892      * GeSHi documentation for more information about this.
893      *
894      * @param string  $style             The style to use for actual code
895      * @param boolean $preserve_defaults Whether to merge the current styles with the new styles
896      * @since 1.0.2
897      */
898     public function set_code_style($style, $preserve_defaults = false) {
899         if (!$preserve_defaults) {
900             $this->code_style = $style;
901         } else {
902             $this->code_style .= $style;
903         }
904     }
905 
906     /**
907      * Sets the styles for the line numbers.
908      *
909      * @param string         $style1 The style for the line numbers that are "normal"
910      * @param string|boolean $style2 If a string, this is the style of the line
911      *        numbers that are "fancy", otherwise if boolean then this
912      *        defines whether the normal styles should be merged with the
913      *        new normal styles or not
914      * @param boolean        $preserve_defaults If set, is the flag for whether to merge the "fancy"
915      *        styles with the current styles or not
916      * @since 1.0.2
917      */
918     public function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
919         //Check if we got 2 or three parameters
920         if (is_bool($style2)) {
921             $preserve_defaults = $style2;
922             $style2 = '';
923         }
924 
925         //Actually set the new styles
926         if (!$preserve_defaults) {
927             $this->line_style1 = $style1;
928             $this->line_style2 = $style2;
929         } else {
930             $this->line_style1 .= $style1;
931             $this->line_style2 .= $style2;
932         }
933     }
934 
935     /**
936      * Sets whether line numbers should be displayed.
937      *
938      * Valid values for the first parameter are:
939      *
940      *  - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
941      *  - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
942      *  - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
943      *
944      * For fancy line numbers, the second parameter is used to signal which lines
945      * are to be fancy. For example, if the value of this parameter is 5 then every
946      * 5th line will be fancy.
947      *
948      * @param int $flag    How line numbers should be displayed
949      * @param int $nth_row Defines which lines are fancy
950      * @since 1.0.0
951      */
952     public function enable_line_numbers($flag, $nth_row = 5) {
953         if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
954             && GESHI_FANCY_LINE_NUMBERS != $flag) {
955             $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
956         }
957         $this->line_numbers = $flag;
958         $this->line_nth_row = $nth_row;
959     }
960 
961     /**
962      * Sets wether spans and other HTML markup generated by GeSHi can
963      * span over multiple lines or not. Defaults to true to reduce overhead.
964      * Set it to false if you want to manipulate the output or manually display
965      * the code in an ordered list.
966      *
967      * @param boolean $flag Wether multiline spans are allowed or not
968      * @since 1.0.7.22
969      */
970     public function enable_multiline_span($flag) {
971         $this->allow_multiline_span = (bool) $flag;
972     }
973 
974     /**
975      * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
976      *
977      * @see enable_multiline_span
978      * @return bool
979      */
980     public function get_multiline_span() {
981         return $this->allow_multiline_span;
982     }
983 
984     /**
985      * Sets the style for a keyword group. If $preserve_defaults is
986      * true, then styles are merged with the default styles, with the
987      * user defined styles having priority
988      *
989      * @param int     $key               The key of the keyword group to change the styles of
990      * @param string  $style             The style to make the keywords
991      * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
992      *                to overwrite them
993      * @since 1.0.0
994      */
995     public function set_keyword_group_style($key, $style, $preserve_defaults = false) {
996         //Set the style for this keyword group
997         if('*' == $key) {
998             foreach($this->language_data['STYLES']['KEYWORDS'] as $_key => $_value) {
999                 if (!$preserve_defaults) {
1000                     $this->language_data['STYLES']['KEYWORDS'][$_key] = $style;
1001                 } else {
1002                     $this->language_data['STYLES']['KEYWORDS'][$_key] .= $style;
1003                 }
1004             }
1005         } else {
1006             if (!$preserve_defaults) {
1007                 $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
1008             } else {
1009                 $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
1010             }
1011         }
1012 
1013         //Update the lexic permissions
1014         if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
1015             $this->lexic_permissions['KEYWORDS'][$key] = true;
1016         }
1017     }
1018 
1019     /**
1020      * Turns highlighting on/off for a keyword group
1021      *
1022      * @param int     $key  The key of the keyword group to turn on or off
1023      * @param boolean $flag Whether to turn highlighting for that group on or off
1024      * @since 1.0.0
1025      */
1026     public function set_keyword_group_highlighting($key, $flag = true) {
1027         $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
1028     }
1029 
1030     /**
1031      * Sets the styles for comment groups.  If $preserve_defaults is
1032      * true, then styles are merged with the default styles, with the
1033      * user defined styles having priority
1034      *
1035      * @param int     $key               The key of the comment group to change the styles of
1036      * @param string  $style             The style to make the comments
1037      * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1038      *                to overwrite them
1039      * @since 1.0.0
1040      */
1041     public function set_comments_style($key, $style, $preserve_defaults = false) {
1042         if('*' == $key) {
1043             foreach($this->language_data['STYLES']['COMMENTS'] as $_key => $_value) {
1044                 if (!$preserve_defaults) {
1045                     $this->language_data['STYLES']['COMMENTS'][$_key] = $style;
1046                 } else {
1047                     $this->language_data['STYLES']['COMMENTS'][$_key] .= $style;
1048                 }
1049             }
1050         } else {
1051             if (!$preserve_defaults) {
1052                 $this->language_data['STYLES']['COMMENTS'][$key] = $style;
1053             } else {
1054                 $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
1055             }
1056         }
1057     }
1058 
1059     /**
1060      * Turns highlighting on/off for comment groups
1061      *
1062      * @param int     $key  The key of the comment group to turn on or off
1063      * @param boolean $flag Whether to turn highlighting for that group on or off
1064      * @since 1.0.0
1065      */
1066     public function set_comments_highlighting($key, $flag = true) {
1067         $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
1068     }
1069 
1070     /**
1071      * Sets the styles for escaped characters. If $preserve_defaults is
1072      * true, then styles are merged with the default styles, with the
1073      * user defined styles having priority
1074      *
1075      * @param string  $style             The style to make the escape characters
1076      * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1077      *                                   to overwrite them
1078      * @param int     $group             Tells the group of symbols for which style should be set.
1079      * @since 1.0.0
1080      */
1081     public function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
1082         if (!$preserve_defaults) {
1083             $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
1084         } else {
1085             $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
1086         }
1087     }
1088 
1089     /**
1090      * Turns highlighting on/off for escaped characters
1091      *
1092      * @param boolean $flag Whether to turn highlighting for escape characters on or off
1093      * @since 1.0.0
1094      */
1095     public function set_escape_characters_highlighting($flag = true) {
1096         $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1097     }
1098 
1099     /**
1100      * Sets the styles for brackets. If $preserve_defaults is
1101      * true, then styles are merged with the default styles, with the
1102      * user defined styles having priority
1103      *
1104      * This method is DEPRECATED: use set_symbols_style instead.
1105      * This method will be removed in 1.2.X
1106      *
1107      * @param string  $style             The style to make the brackets
1108      * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1109      *                to overwrite them
1110      * @since 1.0.0
1111      * @deprecated In favour of set_symbols_style
1112      */
1113     public function set_brackets_style($style, $preserve_defaults = false) {
1114         if (!$preserve_defaults) {
1115             $this->language_data['STYLES']['BRACKETS'][0] = $style;
1116         } else {
1117             $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1118         }
1119     }
1120 
1121     /**
1122      * Turns highlighting on/off for brackets
1123      *
1124      * This method is DEPRECATED: use set_symbols_highlighting instead.
1125      * This method will be remove in 1.2.X
1126      *
1127      * @param boolean $flag Whether to turn highlighting for brackets on or off
1128      * @since 1.0.0
1129      * @deprecated In favour of set_symbols_highlighting
1130      */
1131     public function set_brackets_highlighting($flag) {
1132         $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1133     }
1134 
1135     /**
1136      * Sets the styles for symbols. If $preserve_defaults is
1137      * true, then styles are merged with the default styles, with the
1138      * user defined styles having priority
1139      *
1140      * @param string  $style             The style to make the symbols
1141      * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1142      *                                   to overwrite them
1143      * @param int     $group             Tells the group of symbols for which style should be set.
1144      * @since 1.0.1
1145      */
1146     public function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1147         // Update the style of symbols
1148         if (!$preserve_defaults) {
1149             $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1150         } else {
1151             $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1152         }
1153 
1154         // For backward compatibility
1155         if (0 == $group) {
1156             $this->set_brackets_style ($style, $preserve_defaults);
1157         }
1158     }
1159 
1160     /**
1161      * Turns highlighting on/off for symbols
1162      *
1163      * @param boolean $flag Whether to turn highlighting for symbols on or off
1164      * @since 1.0.0
1165      */
1166     public function set_symbols_highlighting($flag) {
1167         // Update lexic permissions for this symbol group
1168         $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1169 
1170         // For backward compatibility
1171         $this->set_brackets_highlighting ($flag);
1172     }
1173 
1174     /**
1175      * Sets the styles for strings. If $preserve_defaults is
1176      * true, then styles are merged with the default styles, with the
1177      * user defined styles having priority
1178      *
1179      * @param string  $style             The style to make the escape characters
1180      * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1181      *                                   to overwrite them
1182      * @param int     $group             Tells the group of strings for which style should be set.
1183      * @since 1.0.0
1184      */
1185     public function set_strings_style($style, $preserve_defaults = false, $group = 0) {
1186         if (!$preserve_defaults) {
1187             $this->language_data['STYLES']['STRINGS'][$group] = $style;
1188         } else {
1189             $this->language_data['STYLES']['STRINGS'][$group] .= $style;
1190         }
1191     }
1192 
1193     /**
1194      * Turns highlighting on/off for strings
1195      *
1196      * @param boolean $flag Whether to turn highlighting for strings on or off
1197      * @since 1.0.0
1198      */
1199     public function set_strings_highlighting($flag) {
1200         $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1201     }
1202 
1203     /**
1204      * Sets the styles for strict code blocks. If $preserve_defaults is
1205      * true, then styles are merged with the default styles, with the
1206      * user defined styles having priority
1207      *
1208      * @param string  $style             The style to make the script blocks
1209      * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1210      *                                   to overwrite them
1211      * @param int     $group             Tells the group of script blocks for which style should be set.
1212      * @since 1.0.8.4
1213      */
1214     public function set_script_style($style, $preserve_defaults = false, $group = 0) {
1215         // Update the style of symbols
1216         if (!$preserve_defaults) {
1217             $this->language_data['STYLES']['SCRIPT'][$group] = $style;
1218         } else {
1219             $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1220         }
1221     }
1222 
1223     /**
1224      * Sets the styles for numbers. If $preserve_defaults is
1225      * true, then styles are merged with the default styles, with the
1226      * user defined styles having priority
1227      *
1228      * @param string  $style             The style to make the numbers
1229      * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1230      *                                   to overwrite them
1231      * @param int     $group             Tells the group of numbers for which style should be set.
1232      * @since 1.0.0
1233      */
1234     public function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
1235         if (!$preserve_defaults) {
1236             $this->language_data['STYLES']['NUMBERS'][$group] = $style;
1237         } else {
1238             $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
1239         }
1240     }
1241 
1242     /**
1243      * Turns highlighting on/off for numbers
1244      *
1245      * @param boolean $flag Whether to turn highlighting for numbers on or off
1246      * @since 1.0.0
1247      */
1248     public function set_numbers_highlighting($flag) {
1249         $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1250     }
1251 
1252     /**
1253      * Sets the styles for methods. $key is a number that references the
1254      * appropriate "object splitter" - see the language file for the language
1255      * you are highlighting to get this number. If $preserve_defaults is
1256      * true, then styles are merged with the default styles, with the
1257      * user defined styles having priority
1258      *
1259      * @param int     $key               The key of the object splitter to change the styles of
1260      * @param string  $style             The style to make the methods
1261      * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1262      *                                   to overwrite them
1263      * @since 1.0.0
1264      */
1265     public function set_methods_style($key, $style, $preserve_defaults = false) {
1266         if (!$preserve_defaults) {
1267             $this->language_data['STYLES']['METHODS'][$key] = $style;
1268         } else {
1269             $this->language_data['STYLES']['METHODS'][$key] .= $style;
1270         }
1271     }
1272 
1273     /**
1274      * Turns highlighting on/off for methods
1275      *
1276      * @param boolean $flag Whether to turn highlighting for methods on or off
1277      * @since 1.0.0
1278      */
1279     public function set_methods_highlighting($flag) {
1280         $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1281     }
1282 
1283     /**
1284      * Sets the styles for regexps. If $preserve_defaults is
1285      * true, then styles are merged with the default styles, with the
1286      * user defined styles having priority
1287      *
1288      * @param string  $key               The style to make the regular expression matches
1289      * @param boolean $style             Whether to merge the new styles with the old or just
1290      *                                   to overwrite them
1291      * @param bool    $preserve_defaults Whether to merge the new styles with the old or just
1292      *                                to overwrite them
1293      * @since 1.0.0
1294      */
1295     public function set_regexps_style($key, $style, $preserve_defaults = false) {
1296         if (!$preserve_defaults) {
1297             $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1298         } else {
1299             $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1300         }
1301     }
1302 
1303     /**
1304      * Turns highlighting on/off for regexps
1305      *
1306      * @param int     $key  The key of the regular expression group to turn on or off
1307      * @param boolean $flag Whether to turn highlighting for the regular expression group on or off
1308      * @since 1.0.0
1309      */
1310     public function set_regexps_highlighting($key, $flag) {
1311         $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1312     }
1313 
1314     /**
1315      * Sets whether a set of keywords are checked for in a case sensitive manner
1316      *
1317      * @param int     $key  The key of the keyword group to change the case sensitivity of
1318      * @param boolean $case Whether to check in a case sensitive manner or not
1319      * @since 1.0.0
1320      */
1321     public function set_case_sensitivity($key, $case) {
1322         $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1323     }
1324 
1325     /**
1326      * Sets the case that keywords should use when found. Use the constants:
1327      *
1328      *  - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1329      *  - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1330      *  - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1331      *
1332      * @param int $case A constant specifying what to do with matched keywords
1333      * @since 1.0.1
1334      */
1335     public function set_case_keywords($case) {
1336         if (in_array($case, array(
1337             GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
1338             $this->language_data['CASE_KEYWORDS'] = $case;
1339         }
1340     }
1341 
1342     /**
1343      * Sets how many spaces a tab is substituted for
1344      *
1345      * Widths below zero are ignored
1346      *
1347      * @param int $width The tab width
1348      * @since 1.0.0
1349      */
1350     public function set_tab_width($width) {
1351         $this->tab_width = intval($width);
1352 
1353         //Check if it fit's the constraints:
1354         if ($this->tab_width < 1) {
1355             //Return it to the default
1356             $this->tab_width = 8;
1357         }
1358     }
1359 
1360     /**
1361      * Sets whether or not to use tab-stop width specifed by language
1362      *
1363      * @param boolean $use Whether to use language-specific tab-stop widths
1364      * @since 1.0.7.20
1365      */
1366     public function set_use_language_tab_width($use) {
1367         $this->use_language_tab_width = (bool) $use;
1368     }
1369 
1370     /**
1371      * Returns the tab width to use, based on the current language and user
1372      * preference
1373      *
1374      * @return int Tab width
1375      * @since 1.0.7.20
1376      */
1377     public function get_real_tab_width() {
1378         if (!$this->use_language_tab_width ||
1379             !isset($this->language_data['TAB_WIDTH'])) {
1380             return $this->tab_width;
1381         } else {
1382             return $this->language_data['TAB_WIDTH'];
1383         }
1384     }
1385 
1386     /**
1387      * Enables/disables strict highlighting. Default is off, calling this
1388      * method without parameters will turn it on. See documentation
1389      * for more details on strict mode and where to use it.
1390      *
1391      * @param boolean $mode Whether to enable strict mode or not
1392      * @since 1.0.0
1393      */
1394     public function enable_strict_mode($mode = true) {
1395         if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1396             $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1397         }
1398     }
1399 
1400     /**
1401      * Disables all highlighting
1402      *
1403      * @since 1.0.0
1404      * @todo  Rewrite with array traversal
1405      * @deprecated In favour of enable_highlighting
1406      */
1407     public function disable_highlighting() {
1408         $this->enable_highlighting(false);
1409     }
1410 
1411     /**
1412      * Enables all highlighting
1413      *
1414      * The optional flag parameter was added in version 1.0.7.21 and can be used
1415      * to enable (true) or disable (false) all highlighting.
1416      *
1417      * @since 1.0.0
1418      * @param boolean $flag A flag specifying whether to enable or disable all highlighting
1419      * @todo  Rewrite with array traversal
1420      */
1421     public function enable_highlighting($flag = true) {
1422         $flag = $flag ? true : false;
1423         foreach ($this->lexic_permissions as $key => $value) {
1424             if (is_array($value)) {
1425                 foreach ($value as $k => $v) {
1426                     $this->lexic_permissions[$key][$k] = $flag;
1427                 }
1428             } else {
1429                 $this->lexic_permissions[$key] = $flag;
1430             }
1431         }
1432 
1433         // Context blocks
1434         $this->enable_important_blocks = $flag;
1435     }
1436 
1437     /**
1438      * Given a file extension, this method returns either a valid geshi language
1439      * name, or the empty string if it couldn't be found
1440      *
1441      * @param string $extension The extension to get a language name for
1442      * @param array  $lookup    A lookup array to use instead of the default one
1443      * @since 1.0.5
1444      * @todo Re-think about how this method works (maybe make it private and/or make it
1445      *       a extension->lang lookup?)
1446      * @return int|string
1447      */
1448     public static function get_language_name_from_extension( $extension, $lookup = array() ) {
1449         $extension = strtolower($extension);
1450 
1451         if ( !is_array($lookup) || empty($lookup)) {
1452             $lookup = array(
1453                 '6502acme' => array( 'a', 's', 'asm', 'inc' ),
1454                 '6502tasm' => array( 'a', 's', 'asm', 'inc' ),
1455                 '6502kickass' => array( 'a', 's', 'asm', 'inc' ),
1456                 '68000devpac' => array( 'a', 's', 'asm', 'inc' ),
1457                 'abap' => array('abap'),
1458                 'actionscript' => array('as'),
1459                 'ada' => array('a', 'ada', 'adb', 'ads'),
1460                 'apache' => array('conf'),
1461                 'asm' => array('ash', 'asm', 'inc'),
1462                 'asp' => array('asp'),
1463                 'bash' => array('sh'),
1464                 'bf' => array('bf'),
1465                 'c' => array('c', 'h'),
1466                 'c_mac' => array('c', 'h'),
1467                 'caddcl' => array(),
1468                 'cadlisp' => array(),
1469                 'cdfg' => array('cdfg'),
1470                 'cobol' => array('cbl'),
1471                 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1472                 'csharp' => array('cs'),
1473                 'css' => array('css'),
1474                 'd' => array('d'),
1475                 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1476                 'diff' => array('diff', 'patch'),
1477                 'dos' => array('bat', 'cmd'),
1478                 'gdb' => array('kcrash', 'crash', 'bt'),
1479                 'gettext' => array('po', 'pot'),
1480                 'gml' => array('gml'),
1481                 'gnuplot' => array('plt'),
1482                 'groovy' => array('groovy'),
1483                 'haskell' => array('hs'),
1484                 'haxe' => array('hx'),
1485                 'html4strict' => array('html', 'htm'),
1486                 'ini' => array('ini', 'desktop', 'vbp'),
1487                 'java' => array('java'),
1488                 'javascript' => array('js'),
1489                 'klonec' => array('kl1'),
1490                 'klonecpp' => array('klx'),
1491                 'latex' => array('tex'),
1492                 'lisp' => array('lisp'),
1493                 'lua' => array('lua'),
1494                 'matlab' => array('m'),
1495                 'mpasm' => array(),
1496                 'mysql' => array('sql'),
1497                 'nsis' => array(),
1498                 'objc' => array(),
1499                 'oobas' => array(),
1500                 'oracle8' => array(),
1501                 'oracle10' => array(),
1502                 'pascal' => array('pas'),
1503                 'perl' => array('pl', 'pm'),
1504                 'php' => array('php', 'php5', 'phtml', 'phps'),
1505                 'povray' => array('pov'),
1506                 'providex' => array('pvc', 'pvx'),
1507                 'prolog' => array('pl'),
1508                 'python' => array('py'),
1509                 'qbasic' => array('bi'),
1510                 'reg' => array('reg'),
1511                 'ruby' => array('rb'),
1512                 'sas' => array('sas'),
1513                 'scala' => array('scala'),
1514                 'scheme' => array('scm'),
1515                 'scilab' => array('sci'),
1516                 'smalltalk' => array('st'),
1517                 'smarty' => array(),
1518                 'tcl' => array('tcl'),
1519                 'text' => array('txt'),
1520                 'vb' => array('bas', 'ctl', 'frm'),
1521                 'vbnet' => array('vb', 'sln'),
1522                 'visualfoxpro' => array(),
1523                 'whitespace' => array('ws'),
1524                 'xml' => array('xml', 'svg', 'xrc', 'vbproj', 'csproj', 'userprefs', 'resx', 'stetic', 'settings', 'manifest', 'myapp'),
1525                 'z80' => array('z80', 'asm', 'inc')
1526             );
1527         }
1528 
1529         foreach ($lookup as $lang => $extensions) {
1530             if (in_array($extension, $extensions)) {
1531                 return $lang;
1532             }
1533         }
1534 
1535         return 'text';
1536     }
1537 
1538     /**
1539      * Given a file name, this method loads its contents in, and attempts
1540      * to set the language automatically. An optional lookup table can be
1541      * passed for looking up the language name. If not specified a default
1542      * table is used
1543      *
1544      * The language table is in the form
1545      * <pre>array(
1546      *   'lang_name' => array('extension', 'extension', ...),
1547      *   'lang_name' ...
1548      * );</pre>
1549      *
1550      * @param string $file_name The filename to load the source from
1551      * @param array  $lookup    A lookup array to use instead of the default one
1552      * @todo Complete rethink of this and above method
1553      * @since 1.0.5
1554      */
1555     public function load_from_file($file_name, $lookup = array()) {
1556         if (is_readable($file_name)) {
1557             $this->set_source(file_get_contents($file_name));
1558             $this->set_language(self::get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1559         } else {
1560             $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1561         }
1562     }
1563 
1564     /**
1565      * Adds a keyword to a keyword group for highlighting
1566      *
1567      * @param int    $key  The key of the keyword group to add the keyword to
1568      * @param string $word The word to add to the keyword group
1569      * @since 1.0.0
1570      */
1571     public function add_keyword($key, $word) {
1572         if (!is_array($this->language_data['KEYWORDS'][$key])) {
1573             $this->language_data['KEYWORDS'][$key] = array();
1574         }
1575         if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1576             $this->language_data['KEYWORDS'][$key][] = $word;
1577 
1578             //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1579             if ($this->parse_cache_built) {
1580                 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1581                 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1582             }
1583         }
1584     }
1585 
1586     /**
1587      * Removes a keyword from a keyword group
1588      *
1589      * @param int    $key       The key of the keyword group to remove the keyword from
1590      * @param string $word      The word to remove from the keyword group
1591      * @param bool   $recompile Wether to automatically recompile the optimized regexp list or not.
1592      *               Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1593      *               for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1594      *               or the removed keyword will stay in cache and still be highlighted! On the other hand
1595      *               it might be too expensive to recompile the regexp list for every removal if you want to
1596      *               remove a lot of keywords.
1597      * @since 1.0.0
1598      */
1599     public function remove_keyword($key, $word, $recompile = true) {
1600         $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1601         if ($key_to_remove !== false) {
1602             unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1603 
1604             //NEW in 1.0.8, optionally recompile keyword group
1605             if ($recompile && $this->parse_cache_built) {
1606                 $this->optimize_keyword_group($key);
1607             }
1608         }
1609     }
1610 
1611     /**
1612      * Creates a new keyword group
1613      *
1614      * @param int     $key            The key of the keyword group to create
1615      * @param string  $styles         The styles for the keyword group
1616      * @param boolean $case_sensitive Whether the keyword group is case sensitive ornot
1617      * @param array   $words          The words to use for the keyword group
1618      * @since 1.0.0
1619      * @return bool
1620      */
1621     public function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1622         $words = (array) $words;
1623         if  (empty($words)) {
1624             // empty word lists mess up highlighting
1625             return false;
1626         }
1627 
1628         //Add the new keyword group internally
1629         $this->language_data['KEYWORDS'][$key] = $words;
1630         $this->lexic_permissions['KEYWORDS'][$key] = true;
1631         $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1632         $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1633 
1634         //NEW in 1.0.8, cache keyword regexp
1635         if ($this->parse_cache_built) {
1636             $this->optimize_keyword_group($key);
1637         }
1638         return true;
1639     }
1640 
1641     /**
1642      * Removes a keyword group
1643      *
1644      * @param int $key The key of the keyword group to remove
1645      * @since 1.0.0
1646      */
1647     public function remove_keyword_group ($key) {
1648         //Remove the keyword group internally
1649         unset($this->language_data['KEYWORDS'][$key]);
1650         unset($this->lexic_permissions['KEYWORDS'][$key]);
1651         unset($this->language_data['CASE_SENSITIVE'][$key]);
1652         unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1653 
1654         //NEW in 1.0.8
1655         unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1656     }
1657 
1658     /**
1659      * compile optimized regexp list for keyword group
1660      *
1661      * @param int $key The key of the keyword group to compile & optimize
1662      * @since 1.0.8
1663      */
1664     public function optimize_keyword_group($key) {
1665         $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1666             $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1667         $space_as_whitespace = false;
1668         if(isset($this->language_data['PARSER_CONTROL'])) {
1669             if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1670                 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1671                     $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1672                 }
1673                 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1674                     if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1675                         $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1676                     }
1677                 }
1678             }
1679         }
1680         if($space_as_whitespace) {
1681             foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1682                 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1683                     str_replace(" ", "\\s+", $rxv);
1684             }
1685         }
1686     }
1687 
1688     /**
1689      * Sets the content of the header block
1690      *
1691      * @param string $content The content of the header block
1692      * @since 1.0.2
1693      */
1694     public function set_header_content($content) {
1695         $this->header_content = $content;
1696     }
1697 
1698     /**
1699      * Sets the content of the footer block
1700      *
1701      * @param string $content The content of the footer block
1702      * @since 1.0.2
1703      */
1704     public function set_footer_content($content) {
1705         $this->footer_content = $content;
1706     }
1707 
1708     /**
1709      * Sets the style for the header content
1710      *
1711      * @param string $style The style for the header content
1712      * @since 1.0.2
1713      */
1714     public function set_header_content_style($style) {
1715         $this->header_content_style = $style;
1716     }
1717 
1718     /**
1719      * Sets the style for the footer content
1720      *
1721      * @param string $style The style for the footer content
1722      * @since 1.0.2
1723      */
1724     public function set_footer_content_style($style) {
1725         $this->footer_content_style = $style;
1726     }
1727 
1728     /**
1729      * Sets whether to force a surrounding block around
1730      * the highlighted code or not
1731      *
1732      * @param boolean $flag Tells whether to enable or disable this feature
1733      * @since 1.0.7.20
1734      */
1735     public function enable_inner_code_block($flag) {
1736         $this->force_code_block = (bool)$flag;
1737     }
1738 
1739     /**
1740      * Sets the base URL to be used for keywords
1741      *
1742      * @param int    $group The key of the keyword group to set the URL for
1743      * @param string $url   The URL to set for the group. If {FNAME} is in
1744      *                      the url somewhere, it is replaced by the keyword
1745      *                      that the URL is being made for
1746      * @since 1.0.2
1747      */
1748     public function set_url_for_keyword_group($group, $url) {
1749         $this->language_data['URLS'][$group] = $url;
1750     }
1751 
1752     /**
1753      * Sets styles for links in code
1754      *
1755      * @param int    $type   A constant that specifies what state the style is being
1756      *                       set for - e.g. :hover or :visited
1757      * @param string $styles The styles to use for that state
1758      * @since 1.0.2
1759      */
1760     public function set_link_styles($type, $styles) {
1761         $this->link_styles[$type] = $styles;
1762     }
1763 
1764     /**
1765      * Sets the target for links in code
1766      *
1767      * @param string $target The target for links in the code, e.g. _blank
1768      * @since 1.0.3
1769      */
1770     public function set_link_target($target) {
1771         if (!$target) {
1772             $this->link_target = '';
1773         } else {
1774             $this->link_target = ' target="' . $target . '"';
1775         }
1776     }
1777 
1778     /**
1779      * Sets styles for important parts of the code
1780      *
1781      * @param string $styles The styles to use on important parts of the code
1782      * @since 1.0.2
1783      */
1784     public function set_important_styles($styles) {
1785         $this->important_styles = $styles;
1786     }
1787 
1788     /**
1789      * Sets whether context-important blocks are highlighted
1790      *
1791      * @param boolean $flag Tells whether to enable or disable highlighting of important blocks
1792      * @todo REMOVE THIS SHIZ FROM GESHI!
1793      * @deprecated
1794      * @since 1.0.2
1795      */
1796     public function enable_important_blocks($flag) {
1797         $this->enable_important_blocks = ( $flag ) ? true : false;
1798     }
1799 
1800     /**
1801      * Whether CSS IDs should be added to each line
1802      *
1803      * @param boolean $flag If true, IDs will be added to each line.
1804      * @since 1.0.2
1805      */
1806     public function enable_ids($flag = true) {
1807         $this->add_ids = ($flag) ? true : false;
1808     }
1809 
1810     /**
1811      * Specifies which lines to highlight extra
1812      *
1813      * The extra style parameter was added in 1.0.7.21.
1814      *
1815      * @param mixed  $lines An array of line numbers to highlight, or just a line
1816      *                      number on its own.
1817      * @param string $style A string specifying the style to use for this line.
1818      *                      If null is specified, the default style is used.
1819      *                      If false is specified, the line will be removed from
1820      *                      special highlighting
1821      * @since 1.0.2
1822      * @todo  Some data replication here that could be cut down on
1823      */
1824     public function highlight_lines_extra($lines, $style = null) {
1825         if (is_array($lines)) {
1826             //Split up the job using single lines at a time
1827             foreach ($lines as $line) {
1828                 $this->highlight_lines_extra($line, $style);
1829             }
1830         } else {
1831             //Mark the line as being highlighted specially
1832             $lines = intval($lines);
1833             $this->highlight_extra_lines[$lines] = $lines;
1834 
1835             //Decide on which style to use
1836             if ($style === null) { //Check if we should use default style
1837                 unset($this->highlight_extra_lines_styles[$lines]);
1838             } elseif ($style === false) { //Check if to remove this line
1839                 unset($this->highlight_extra_lines[$lines]);
1840                 unset($this->highlight_extra_lines_styles[$lines]);
1841             } else {
1842                 $this->highlight_extra_lines_styles[$lines] = $style;
1843             }
1844         }
1845     }
1846 
1847     /**
1848      * Sets the style for extra-highlighted lines
1849      *
1850      * @param string $styles The style for extra-highlighted lines
1851      * @since 1.0.2
1852      */
1853     public function set_highlight_lines_extra_style($styles) {
1854         $this->highlight_extra_lines_style = $styles;
1855     }
1856 
1857     /**
1858      * Sets the line-ending
1859      *
1860      * @param string $line_ending The new line-ending
1861      * @since 1.0.2
1862      */
1863     public function set_line_ending($line_ending) {
1864         $this->line_ending = (string)$line_ending;
1865     }
1866 
1867     /**
1868      * Sets what number line numbers should start at. Should
1869      * be a positive integer, and will be converted to one.
1870      *
1871      * <b>Warning:</b> Using this method will add the "start"
1872      * attribute to the &lt;ol&gt; that is used for line numbering.
1873      * This is <b>not</b> valid XHTML strict, so if that's what you
1874      * care about then don't use this method. Firefox is getting
1875      * support for the CSS method of doing this in 1.1 and Opera
1876      * has support for the CSS method, but (of course) IE doesn't
1877      * so it's not worth doing it the CSS way yet.
1878      *
1879      * @param int $number The number to start line numbers at
1880      * @since 1.0.2
1881      */
1882     public function start_line_numbers_at($number) {
1883         $this->line_numbers_start = abs(intval($number));
1884     }
1885 
1886     /**
1887      * Sets the encoding used for htmlspecialchars(), for international
1888      * support.
1889      *
1890      * NOTE: This is not needed for now because htmlspecialchars() is not
1891      * being used (it has a security hole in PHP4 that has not been patched).
1892      * Maybe in a future version it may make a return for speed reasons, but
1893      * I doubt it.
1894      *
1895      * @param string $encoding The encoding to use for the source
1896      * @since 1.0.3
1897      */
1898     public function set_encoding($encoding) {
1899         if ($encoding) {
1900           $this->encoding = strtolower($encoding);
1901         }
1902     }
1903 
1904     /**
1905      * Turns linking of keywords on or off.
1906      *
1907      * @param boolean $enable If true, links will be added to keywords
1908      * @since 1.0.2
1909      */
1910     public function enable_keyword_links($enable = true) {
1911         $this->keyword_links = (bool) $enable;
1912     }
1913 
1914     /**
1915      * Setup caches needed for styling. This is automatically called in
1916      * parse_code() and get_stylesheet() when appropriate. This function helps
1917      * stylesheet generators as they rely on some style information being
1918      * preprocessed
1919      *
1920      * @since 1.0.8
1921      */
1922     protected function build_style_cache() {
1923         //Build the style cache needed to highlight numbers appropriate
1924         if($this->lexic_permissions['NUMBERS']) {
1925             //First check what way highlighting information for numbers are given
1926             if(!isset($this->language_data['NUMBERS'])) {
1927                 $this->language_data['NUMBERS'] = 0;
1928             }
1929 
1930             if(is_array($this->language_data['NUMBERS'])) {
1931                 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1932             } else {
1933                 $this->language_data['NUMBERS_CACHE'] = array();
1934                 if(!$this->language_data['NUMBERS']) {
1935                     $this->language_data['NUMBERS'] =
1936                         GESHI_NUMBER_INT_BASIC |
1937                         GESHI_NUMBER_FLT_NONSCI;
1938                 }
1939 
1940                 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1941                     //Rearrange style indices if required ...
1942                     if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1943                         $this->language_data['STYLES']['NUMBERS'][$i] =
1944                             $this->language_data['STYLES']['NUMBERS'][1<<$i];
1945                         unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1946                     }
1947 
1948                     //Check if this bit is set for highlighting
1949                     if($j&1) {
1950                         //So this bit is set ...
1951                         //Check if it belongs to group 0 or the actual stylegroup
1952                         if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1953                             $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1954                         } else {
1955                             if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1956                                 $this->language_data['NUMBERS_CACHE'][0] = 0;
1957                             }
1958                             $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1959                         }
1960                     }
1961                 }
1962             }
1963         }
1964     }
1965 
1966     /**
1967      * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1968      * This function makes stylesheet generators much faster as they do not need these caches.
1969      *
1970      * @since 1.0.8
1971      */
1972     protected function build_parse_cache() {
1973         // check whether language_data is available
1974         if (empty($this->language_data)) {
1975             return false;
1976         }
1977 
1978         // cache symbol regexp
1979         //As this is a costy operation, we avoid doing it for multiple groups ...
1980         //Instead we perform it for all symbols at once.
1981         //
1982         //For this to work, we need to reorganize the data arrays.
1983         if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1984             $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1985 
1986             $this->language_data['SYMBOL_DATA'] = array();
1987             $symbol_preg_multi = array(); // multi char symbols
1988             $symbol_preg_single = array(); // single char symbols
1989             foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1990                 if (is_array($symbols)) {
1991                     foreach ($symbols as $sym) {
1992                         $sym = $this->hsc($sym);
1993                         if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1994                             $this->language_data['SYMBOL_DATA'][$sym] = $key;
1995                             if (isset($sym[1])) { // multiple chars
1996                                 $symbol_preg_multi[] = preg_quote($sym, '/');
1997                             } else { // single char
1998                                 if ($sym == '-') {
1999                                     // don't trigger range out of order error
2000                                     $symbol_preg_single[] = '\-';
2001                                 } else {
2002                                     $symbol_preg_single[] = preg_quote($sym, '/');
2003                                 }
2004                             }
2005                         }
2006                     }
2007                 } else {
2008                     $symbols = $this->hsc($symbols);
2009                     if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
2010                         $this->language_data['SYMBOL_DATA'][$symbols] = 0;
2011                         if (isset($symbols[1])) { // multiple chars
2012                             $symbol_preg_multi[] = preg_quote($symbols, '/');
2013                         } elseif ($symbols == '-') {
2014                             // don't trigger range out of order error
2015                             $symbol_preg_single[] = '\-';
2016                         } else { // single char
2017                             $symbol_preg_single[] = preg_quote($symbols, '/');
2018                         }
2019                     }
2020                 }
2021             }
2022 
2023             //Now we have an array with each possible symbol as the key and the style as the actual data.
2024             //This way we can set the correct style just the moment we highlight ...
2025             //
2026             //Now we need to rewrite our array to get a search string that
2027             $symbol_preg = array();
2028             if (!empty($symbol_preg_multi)) {
2029                 rsort($symbol_preg_multi);
2030                 $symbol_preg[] = implode('|', $symbol_preg_multi);
2031             }
2032             if (!empty($symbol_preg_single)) {
2033                 rsort($symbol_preg_single);
2034                 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
2035             }
2036             $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
2037         }
2038 
2039         // cache optimized regexp for keyword matching
2040         // remove old cache
2041         $this->language_data['CACHED_KEYWORD_LISTS'] = array();
2042         foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
2043             if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
2044                     $this->lexic_permissions['KEYWORDS'][$key]) {
2045                 $this->optimize_keyword_group($key);
2046             }
2047         }
2048 
2049         // brackets
2050         if ($this->lexic_permissions['BRACKETS']) {
2051             $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
2052             if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
2053                 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2054                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
2055                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
2056                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
2057                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
2058                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
2059                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
2060                 );
2061             }
2062             else {
2063                 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2064                     '<| class="br0">&#91;|>',
2065                     '<| class="br0">&#93;|>',
2066                     '<| class="br0">&#40;|>',
2067                     '<| class="br0">&#41;|>',
2068                     '<| class="br0">&#123;|>',
2069                     '<| class="br0">&#125;|>',
2070                 );
2071             }
2072         }
2073 
2074         //Build the parse cache needed to highlight numbers appropriate
2075         if($this->lexic_permissions['NUMBERS']) {
2076             //Check if the style rearrangements have been processed ...
2077             //This also does some preprocessing to check which style groups are useable ...
2078             if(!isset($this->language_data['NUMBERS_CACHE'])) {
2079                 $this->build_style_cache();
2080             }
2081 
2082             //Number format specification
2083             //All this formats are matched case-insensitively!
2084             static $numbers_format = array(
2085                 GESHI_NUMBER_INT_BASIC =>
2086                     '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2087                 GESHI_NUMBER_INT_CSTYLE =>
2088                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2089                 GESHI_NUMBER_BIN_SUFFIX =>
2090                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2091                 GESHI_NUMBER_BIN_PREFIX_PERCENT =>
2092                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2093                 GESHI_NUMBER_BIN_PREFIX_0B =>
2094                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2095                 GESHI_NUMBER_OCT_PREFIX =>
2096                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2097                 GESHI_NUMBER_OCT_PREFIX_0O =>
2098                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2099                 GESHI_NUMBER_OCT_PREFIX_AT =>
2100                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2101                 GESHI_NUMBER_OCT_SUFFIX =>
2102                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2103                 GESHI_NUMBER_HEX_PREFIX =>
2104                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2105                 GESHI_NUMBER_HEX_PREFIX_DOLLAR =>
2106                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2107                 GESHI_NUMBER_HEX_SUFFIX =>
2108                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2109                 GESHI_NUMBER_FLT_NONSCI =>
2110                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2111                 GESHI_NUMBER_FLT_NONSCI_F =>
2112                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2113                 GESHI_NUMBER_FLT_SCI_SHORT =>
2114                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2115                 GESHI_NUMBER_FLT_SCI_ZERO =>
2116                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
2117                 );
2118 
2119             //At this step we have an associative array with flag groups for a
2120             //specific style or an string denoting a regexp given its index.
2121             $this->language_data['NUMBERS_RXCACHE'] = array();
2122             foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2123                 if(is_string($rxdata)) {
2124                     $regexp = $rxdata;
2125                 } else {
2126                     //This is a bitfield of number flags to highlight:
2127                     //Build an array, implode them together and make this the actual RX
2128                     $rxuse = array();
2129                     for($i = 1; $i <= $rxdata; $i<<=1) {
2130                         if($rxdata & $i) {
2131                             $rxuse[] = $numbers_format[$i];
2132                         }
2133                     }
2134                     $regexp = implode("|", $rxuse);
2135                 }
2136 
2137                 $this->language_data['NUMBERS_RXCACHE'][$key] =
2138                     "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i";
2139             }
2140 
2141             if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) {
2142                 $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#';
2143             }
2144         }
2145 
2146         $this->parse_cache_built = true;
2147     }
2148 
2149     /**
2150      * Returns the code in $this->source, highlighted and surrounded by the
2151      * nessecary HTML.
2152      *
2153      * This should only be called ONCE, cos it's SLOW! If you want to highlight
2154      * the same source multiple times, you're better off doing a whole lot of
2155      * str_replaces to replace the &lt;span&gt;s
2156      *
2157      * @since 1.0.0
2158      */
2159     public function parse_code() {
2160         // Start the timer
2161         $start_time = microtime();
2162 
2163         // Replace all newlines to a common form.
2164         $code = str_replace("\r\n", "\n", $this->source);
2165         $code = str_replace("\r", "\n", $code);
2166 
2167         // check whether language_data is available
2168         if (empty($this->language_data)) {
2169             $this->error = GESHI_ERROR_NO_SUCH_LANG;
2170         }
2171 
2172         // Firstly, if there is an error, we won't highlight
2173         if ($this->error) {
2174             //Escape the source for output
2175             $result = $this->hsc($this->source);
2176 
2177             //This fix is related to SF#1923020, but has to be applied regardless of
2178             //actually highlighting symbols.
2179             $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2180 
2181             // Timing is irrelevant
2182             $this->set_time($start_time, $start_time);
2183             $this->finalise($result);
2184             return $result;
2185         }
2186 
2187         // make sure the parse cache is up2date
2188         if (!$this->parse_cache_built) {
2189             $this->build_parse_cache();
2190         }
2191 
2192         // Initialise various stuff
2193         $length           = strlen($code);
2194         $COMMENT_MATCHED  = false;
2195         $stuff_to_parse   = '';
2196         $endresult        = '';
2197 
2198         // "Important" selections are handled like multiline comments
2199         // @todo GET RID OF THIS SHIZ
2200         if ($this->enable_important_blocks) {
2201             $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2202         }
2203 
2204         if ($this->strict_mode) {
2205             // Break the source into bits. Each bit will be a portion of the code
2206             // within script delimiters - for example, HTML between < and >
2207             $k = 0;
2208             $parts = array();
2209             $matches = array();
2210             $next_match_pointer = null;
2211             // we use a copy to unset delimiters on demand (when they are not found)
2212             $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2213             $i = 0;
2214             while ($i < $length) {
2215                 $next_match_pos = $length + 1; // never true
2216                 foreach ($delim_copy as $dk => $delimiters) {
2217                     if(is_array($delimiters)) {
2218                         foreach ($delimiters as $open => $close) {
2219                             // make sure the cache is setup properly
2220                             if (!isset($matches[$dk][$open])) {
2221                                 $matches[$dk][$open] = array(
2222                                     'next_match' => -1,
2223                                     'dk' => $dk,
2224 
2225                                     'open' => $open, // needed for grouping of adjacent code blocks (see below)
2226                                     'open_strlen' => strlen($open),
2227 
2228                                     'close' => $close,
2229                                     'close_strlen' => strlen($close),
2230                                 );
2231                             }
2232                             // Get the next little bit for this opening string
2233                             if ($matches[$dk][$open]['next_match'] < $i) {
2234                                 // only find the next pos if it was not already cached
2235                                 $open_pos = strpos($code, $open, $i);
2236                                 if ($open_pos === false) {
2237                                     // no match for this delimiter ever
2238                                     unset($delim_copy[$dk][$open]);
2239                                     continue;
2240                                 }
2241                                 $matches[$dk][$open]['next_match'] = $open_pos;
2242                             }
2243                             if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2244                                 //So we got a new match, update the close_pos
2245                                 $matches[$dk][$open]['close_pos'] =
2246                                     strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2247 
2248                                 $next_match_pointer =& $matches[$dk][$open];
2249                                 $next_match_pos = $matches[$dk][$open]['next_match'];
2250                             }
2251                         }
2252                     } else {
2253                         //So we should match an RegExp as Strict Block ...
2254                         /**
2255                          * The value in $delimiters is expected to be an RegExp
2256                          * containing exactly 2 matching groups:
2257                          *  - Group 1 is the opener
2258                          *  - Group 2 is the closer
2259                          */
2260                         if(preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2261                             //We got a match ...
2262                             if(isset($matches_rx['start']) && isset($matches_rx['end']))
2263                             {
2264                                 $matches[$dk] = array(
2265                                     'next_match' => $matches_rx['start'][1],
2266                                     'dk' => $dk,
2267 
2268                                     'close_strlen' => strlen($matches_rx['end'][0]),
2269                                     'close_pos' => $matches_rx['end'][1],
2270                                     );
2271                             } else {
2272                                 $matches[$dk] = array(
2273                                     'next_match' => $matches_rx[1][1],
2274                                     'dk' => $dk,
2275 
2276                                     'close_strlen' => strlen($matches_rx[2][0]),
2277                                     'close_pos' => $matches_rx[2][1],
2278                                     );
2279                             }
2280                         } else {
2281                             // no match for this delimiter ever
2282                             unset($delim_copy[$dk]);
2283                             continue;
2284                         }
2285 
2286                         if ($matches[$dk]['next_match'] <= $next_match_pos) {
2287                             $next_match_pointer =& $matches[$dk];
2288                             $next_match_pos = $matches[$dk]['next_match'];
2289                         }
2290                     }
2291                 }
2292 
2293                 // non-highlightable text
2294                 $parts[$k] = array(
2295                     1 => substr($code, $i, $next_match_pos - $i)
2296                 );
2297                 ++$k;
2298 
2299                 if ($next_match_pos > $length) {
2300                     // out of bounds means no next match was found
2301                     break;
2302                 }
2303 
2304                 // highlightable code
2305                 $parts[$k][0] = $next_match_pointer['dk'];
2306 
2307                 //Only combine for non-rx script blocks
2308                 if(is_array($delim_copy[$next_match_pointer['dk']])) {
2309                     // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2310                     $i = $next_match_pos + $next_match_pointer['open_strlen'];
2311                     while (true) {
2312                         $close_pos = strpos($code, $next_match_pointer['close'], $i);
2313                         if ($close_pos == false) {
2314                             break;
2315                         }
2316                         $i = $close_pos + $next_match_pointer['close_strlen'];
2317                         if ($i == $length) {
2318                             break;
2319                         }
2320                         if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2321                             substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2322                             // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2323                             foreach ($matches as $submatches) {
2324                                 foreach ($submatches as $match) {
2325                                     if ($match['next_match'] == $i) {
2326                                         // a different block already matches here!
2327                                         break 3;
2328                                     }
2329                                 }
2330                             }
2331                         } else {
2332                             break;
2333                         }
2334                     }
2335                 } else {
2336                     $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2337                     $i = $close_pos;
2338                 }
2339 
2340                 if ($close_pos === false) {
2341                     // no closing delimiter found!
2342                     $parts[$k][1] = substr($code, $next_match_pos);
2343                     ++$k;
2344                     break;
2345                 } else {
2346                     $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2347                     ++$k;
2348                 }
2349             }
2350             unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2351             $num_parts = $k;
2352 
2353             if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2354                 // when we have only one part, we don't have anything to highlight at all.
2355                 // if we have a "maybe" strict language, this should be handled as highlightable code
2356                 $parts = array(
2357                     0 => array(
2358                         0 => '',
2359                         1 => ''
2360                     ),
2361                     1 => array(
2362                         0 => null,
2363                         1 => $parts[0][1]
2364                     )
2365                 );
2366                 $num_parts = 2;
2367             }
2368 
2369         } else {
2370             // Not strict mode - simply dump the source into
2371             // the array at index 1 (the first highlightable block)
2372             $parts = array(
2373                 0 => array(
2374                     0 => '',
2375                     1 => ''
2376                 ),
2377                 1 => array(
2378                     0 => null,
2379                     1 => $code
2380                 )
2381             );
2382             $num_parts = 2;
2383         }
2384 
2385         //Unset variables we won't need any longer
2386         unset($code);
2387 
2388         //Preload some repeatedly used values regarding hardquotes ...
2389         $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2390         $hq_strlen = strlen($hq);
2391 
2392         //Preload if line numbers are to be generated afterwards
2393         //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2394         $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2395             !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2396 
2397         //preload the escape char for faster checking ...
2398         $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2399 
2400         // this is used for single-line comments
2401         $sc_disallowed_before = "";
2402         $sc_disallowed_after = "";
2403 
2404         if (isset($this->language_data['PARSER_CONTROL'])) {
2405             if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2406                 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2407                     $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2408                 }
2409                 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2410                     $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2411                 }
2412             }
2413         }
2414 
2415         //Fix for SF#1932083: Multichar Quotemarks unsupported
2416         $is_string_starter = array();
2417         if ($this->lexic_permissions['STRINGS']) {
2418             foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2419                 if (!isset($is_string_starter[$quotemark[0]])) {
2420                     $is_string_starter[$quotemark[0]] = (string)$quotemark;
2421                 } elseif (is_string($is_string_starter[$quotemark[0]])) {
2422                     $is_string_starter[$quotemark[0]] = array(
2423                         $is_string_starter[$quotemark[0]],
2424                         $quotemark);
2425                 } else {
2426                     $is_string_starter[$quotemark[0]][] = $quotemark;
2427                 }
2428             }
2429         }
2430 
2431         // Now we go through each part. We know that even-indexed parts are
2432         // code that shouldn't be highlighted, and odd-indexed parts should
2433         // be highlighted
2434         for ($key = 0; $key < $num_parts; ++$key) {
2435             $STRICTATTRS = '';
2436 
2437             // If this block should be highlighted...
2438             if (!($key & 1)) {
2439                 // Else not a block to highlight
2440                 $endresult .= $this->hsc($parts[$key][1]);
2441                 unset($parts[$key]);
2442                 continue;
2443             }
2444 
2445             $result = '';
2446             $part = $parts[$key][1];
2447 
2448             $highlight_part = true;
2449             if ($this->strict_mode && !is_null($parts[$key][0])) {
2450                 // get the class key for this block of code
2451                 $script_key = $parts[$key][0];
2452                 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2453                 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2454                     $this->lexic_permissions['SCRIPT']) {
2455                     // Add a span element around the source to
2456                     // highlight the overall source block
2457                     if (!$this->use_classes &&
2458                         $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2459                         $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2460                     } else {
2461                         $attributes = ' class="sc' . $script_key . '"';
2462                     }
2463                     $result .= "<span$attributes>";
2464                     $STRICTATTRS = $attributes;
2465                 }
2466             }
2467 
2468             if ($highlight_part) {
2469                 // Now, highlight the code in this block. This code
2470                 // is really the engine of GeSHi (along with the method
2471                 // parse_non_string_part).
2472 
2473                 // cache comment regexps incrementally
2474                 $next_comment_regexp_key = '';
2475                 $next_comment_regexp_pos = -1;
2476                 $next_comment_multi_pos = -1;
2477                 $next_comment_single_pos = -1;
2478                 $comment_regexp_cache_per_key = array();
2479                 $comment_multi_cache_per_key = array();
2480                 $comment_single_cache_per_key = array();
2481                 $next_open_comment_multi = '';
2482                 $next_comment_single_key = '';
2483                 $escape_regexp_cache_per_key = array();
2484                 $next_escape_regexp_key = '';
2485                 $next_escape_regexp_pos = -1;
2486 
2487                 $length = strlen($part);
2488                 for ($i = 0; $i < $length; ++$i) {
2489                     // Get the next char
2490                     $char = $part[$i];
2491                     $char_len = 1;
2492 
2493                     // update regexp comment cache if needed
2494                     if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2495                         $next_comment_regexp_pos = $length;
2496                         foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2497                             $match_i = false;
2498                             if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2499                                 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2500                                  $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2501                                 // we have already matched something
2502                                 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2503                                     // this comment is never matched
2504                                     continue;
2505                                 }
2506                                 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2507                             } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i)) {
2508                                 $match_i = $match[0][1];
2509 
2510                                 $comment_regexp_cache_per_key[$comment_key] = array(
2511                                     'key' => $comment_key,
2512                                     'length' => strlen($match[0][0]),
2513                                     'pos' => $match_i
2514                                 );
2515                             } else {
2516                                 $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2517                                 continue;
2518                             }
2519 
2520                             if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2521                                 $next_comment_regexp_pos = $match_i;
2522                                 $next_comment_regexp_key = $comment_key;
2523                                 if ($match_i === $i) {
2524                                     break;
2525                                 }
2526                             }
2527                         }
2528                     }
2529 
2530                     $string_started = false;
2531 
2532                     if (isset($is_string_starter[$char])) {
2533                         // Possibly the start of a new string ...
2534 
2535                         //Check which starter it was ...
2536                         //Fix for SF#1932083: Multichar Quotemarks unsupported
2537                         if (is_array($is_string_starter[$char])) {
2538                             $char_new = '';
2539                             foreach ($is_string_starter[$char] as $testchar) {
2540                                 if ($testchar === substr($part, $i, strlen($testchar)) &&
2541                                     strlen($testchar) > strlen($char_new)) {
2542                                     $char_new = $testchar;
2543                                     $string_started = true;
2544                                 }
2545                             }
2546                             if ($string_started) {
2547                                 $char = $char_new;
2548                             }
2549                         } else {
2550                             $testchar = $is_string_starter[$char];
2551                             if ($testchar === substr($part, $i, strlen($testchar))) {
2552                                 $char = $testchar;
2553                                 $string_started = true;
2554                             }
2555                         }
2556                         $char_len = strlen($char);
2557                     }
2558 
2559                     if ($string_started && ($i != $next_comment_regexp_pos)) {
2560                         // Hand out the correct style information for this string
2561                         $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2562                         if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2563                             !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2564                             $string_key = 0;
2565                         }
2566 
2567                         // parse the stuff before this
2568                         $result .= $this->parse_non_string_part($stuff_to_parse);
2569                         $stuff_to_parse = '';
2570 
2571                         if (!$this->use_classes) {
2572                             $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2573                         } else {
2574                             $string_attributes = ' class="st'.$string_key.'"';
2575                         }
2576 
2577                         // now handle the string
2578                         $string = "<span$string_attributes>" . GeSHi::hsc($char);
2579                         $start = $i + $char_len;
2580                         $string_open = true;
2581 
2582                         if(empty($this->language_data['ESCAPE_REGEXP'])) {
2583                             $next_escape_regexp_pos = $length;
2584                         }
2585 
2586                         do {
2587                             //Get the regular ending pos ...
2588                             $close_pos = strpos($part, $char, $start);
2589                             if(false === $close_pos) {
2590                                 $close_pos = $length;
2591                             }
2592 
2593                             if($this->lexic_permissions['ESCAPE_CHAR']) {
2594                                 // update escape regexp cache if needed
2595                                 if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2596                                     $next_escape_regexp_pos = $length;
2597                                     foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2598                                         $match_i = false;
2599                                         if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2600                                             ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2601                                              $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2602                                             // we have already matched something
2603                                             if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2604                                                 // this comment is never matched
2605                                                 continue;
2606                                             }
2607                                             $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2608                                         } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start)) {
2609                                             $match_i = $match[0][1];
2610 
2611                                             $escape_regexp_cache_per_key[$escape_key] = array(
2612                                                 'key' => $escape_key,
2613                                                 'length' => strlen($match[0][0]),
2614                                                 'pos' => $match_i
2615                                             );
2616                                         } else {
2617                                             $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2618                                             continue;
2619                                         }
2620 
2621                                         if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2622                                             $next_escape_regexp_pos = $match_i;
2623                                             $next_escape_regexp_key = $escape_key;
2624                                             if ($match_i === $start) {
2625                                                 break;
2626                                             }
2627                                         }
2628                                     }
2629                                 }
2630 
2631                                 //Find the next simple escape position
2632                                 if('' != $this->language_data['ESCAPE_CHAR']) {
2633                                     $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2634                                     if(false === $simple_escape) {
2635                                         $simple_escape = $length;
2636                                     }
2637                                 } else {
2638                                     $simple_escape = $length;
2639                                 }
2640                             } else {
2641                                 $next_escape_regexp_pos = $length;
2642                                 $simple_escape = $length;
2643                             }
2644 
2645                             if($simple_escape < $next_escape_regexp_pos &&
2646                                 $simple_escape < $length &&
2647                                 $simple_escape < $close_pos) {
2648                                 //The nexxt escape sequence is a simple one ...
2649                                 $es_pos = $simple_escape;
2650 
2651                                 //Add the stuff not in the string yet ...
2652                                 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2653 
2654                                 //Get the style for this escaped char ...
2655                                 if (!$this->use_classes) {
2656                                     $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2657                                 } else {
2658                                     $escape_char_attributes = ' class="es0"';
2659                                 }
2660 
2661                                 //Add the style for the escape char ...
2662                                 $string .= "<span$escape_char_attributes>" .
2663                                     GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2664 
2665                                 //Get the byte AFTER the ESCAPE_CHAR we just found
2666                                 $es_char = $part[$es_pos + 1];
2667                                 if ($es_char == "\n") {
2668                                     // don't put a newline around newlines
2669                                     $string .= "</span>\n";
2670                                     $start = $es_pos + 2;
2671                                 } elseif (ord($es_char) >= 128) {
2672                                     //This is an non-ASCII char (UTF8 or single byte)
2673                                     //This code tries to work around SF#2037598 ...
2674                                     if(function_exists('mb_substr')) {
2675                                         $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2676                                         $string .= $es_char_m . '</span>';
2677                                     } elseif ('utf-8' == $this->encoding) {
2678                                         if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2679                                             "|\xE0[\xA0-\xBF][\x80-\xBF]".
2680                                             "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2681                                             "|\xED[\x80-\x9F][\x80-\xBF]".
2682                                             "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2683                                             "|[\xF1-\xF3][\x80-\xBF]{3}".
2684                                             "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2685                                             $part, $es_char_m, null, $es_pos + 1)) {
2686                                             $es_char_m = $es_char_m[0];
2687                                         } else {
2688                                             $es_char_m = $es_char;
2689                                         }
2690                                         $string .= $this->hsc($es_char_m) . '</span>';
2691                                     } else {
2692                                         $es_char_m = $this->hsc($es_char);
2693                                     }
2694                                     $start = $es_pos + strlen($es_char_m) + 1;
2695                                 } else {
2696                                     $string .= $this->hsc($es_char) . '</span>';
2697                                     $start = $es_pos + 2;
2698                                 }
2699                             } elseif ($next_escape_regexp_pos < $length &&
2700                                 $next_escape_regexp_pos < $close_pos) {
2701                                 $es_pos = $next_escape_regexp_pos;
2702                                 //Add the stuff not in the string yet ...
2703                                 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2704 
2705                                 //Get the key and length of this match ...
2706                                 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2707                                 $escape_str = substr($part, $es_pos, $escape['length']);
2708                                 $escape_key = $escape['key'];
2709 
2710                                 //Get the style for this escaped char ...
2711                                 if (!$this->use_classes) {
2712                                     $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2713                                 } else {
2714                                     $escape_char_attributes = ' class="es' . $escape_key . '"';
2715                                 }
2716 
2717                                 //Add the style for the escape char ...
2718                                 $string .= "<span$escape_char_attributes>" .
2719                                     $this->hsc($escape_str) . '</span>';
2720 
2721                                 $start = $es_pos + $escape['length'];
2722                             } else {
2723                                 //Copy the remainder of the string ...
2724                                 $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2725                                 $start = $close_pos + $char_len;
2726                                 $string_open = false;
2727                             }
2728                         } while($string_open);
2729 
2730                         if ($check_linenumbers) {
2731                             // Are line numbers used? If, we should end the string before
2732                             // the newline and begin it again (so when <li>s are put in the source
2733                             // remains XHTML compliant)
2734                             // note to self: This opens up possibility of config files specifying
2735                             // that languages can/cannot have multiline strings???
2736                             $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2737                         }
2738 
2739                         $result .= $string;
2740                         $string = '';
2741                         $i = $start - 1;
2742                         continue;
2743                     } elseif ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2744                         substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2745                         // The start of a hard quoted string
2746                         if (!$this->use_classes) {
2747                             $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2748                             $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2749                         } else {
2750                             $string_attributes = ' class="st_h"';
2751                             $escape_char_attributes = ' class="es_h"';
2752                         }
2753                         // parse the stuff before this
2754                         $result .= $this->parse_non_string_part($stuff_to_parse);
2755                         $stuff_to_parse = '';
2756 
2757                         // now handle the string
2758                         $string = '';
2759 
2760                         // look for closing quote
2761                         $start = $i + $hq_strlen;
2762                         while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2763                             $start = $close_pos + 1;
2764                             if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] &&
2765                                 (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape
2766                                 // make sure this quote is not escaped
2767                                 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2768                                     if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2769                                         // check wether this quote is escaped or if it is something like '\\'
2770                                         $escape_char_pos = $close_pos - 1;
2771                                         while ($escape_char_pos > 0
2772                                                 && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2773                                             --$escape_char_pos;
2774                                         }
2775                                         if (($close_pos - $escape_char_pos) & 1) {
2776                                             // uneven number of escape chars => this quote is escaped
2777                                             continue 2;
2778                                         }
2779                                     }
2780                                 }
2781                             }
2782 
2783                             // found closing quote
2784                             break;
2785                         }
2786 
2787                         //Found the closing delimiter?
2788                         if (!$close_pos) {
2789                             // span till the end of this $part when no closing delimiter is found
2790                             $close_pos = $length;
2791                         }
2792 
2793                         //Get the actual string
2794                         $string = substr($part, $i, $close_pos - $i + 1);
2795                         $i = $close_pos;
2796 
2797                         // handle escape chars and encode html chars
2798                         // (special because when we have escape chars within our string they may not be escaped)
2799                         if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2800                             $start = 0;
2801                             $new_string = '';
2802                             while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2803                                 // hmtl escape stuff before
2804                                 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2805                                 // check if this is a hard escape
2806                                 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2807                                     if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2808                                         // indeed, this is a hardescape
2809                                         $new_string .= "<span$escape_char_attributes>" .
2810                                             $this->hsc($hardescape) . '</span>';
2811                                         $start = $es_pos + strlen($hardescape);
2812                                         continue 2;
2813                                     }
2814                                 }
2815                                 // not a hard escape, but a normal escape
2816                                 // they come in pairs of two
2817                                 $c = 0;
2818                                 while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2819                                     && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2820                                     && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2821                                     $c += 2;
2822                                 }
2823                                 if ($c) {
2824                                     $new_string .= "<span$escape_char_attributes>" .
2825                                         str_repeat($escaped_escape_char, $c) .
2826                                         '</span>';
2827                                     $start = $es_pos + $c;
2828                                 } else {
2829                                     // this is just a single lonely escape char...
2830                                     $new_string .= $escaped_escape_char;
2831                                     $start = $es_pos + 1;
2832                                 }
2833                             }
2834                             $string = $new_string . $this->hsc(substr($string, $start));
2835                         } else {
2836                             $string = $this->hsc($string);
2837                         }
2838 
2839                         if ($check_linenumbers) {
2840                             // Are line numbers used? If, we should end the string before
2841                             // the newline and begin it again (so when <li>s are put in the source
2842                             // remains XHTML compliant)
2843                             // note to self: This opens up possibility of config files specifying
2844                             // that languages can/cannot have multiline strings???
2845                             $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2846                         }
2847 
2848                         $result .= "<span$string_attributes>" . $string . '</span>';
2849                         $string = '';
2850                         continue;
2851                     } else {
2852                         //Have a look for regexp comments
2853                         if ($i == $next_comment_regexp_pos) {
2854                             $COMMENT_MATCHED = true;
2855                             $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2856                             $test_str = $this->hsc(substr($part, $i, $comment['length']));
2857 
2858                             //@todo If remove important do remove here
2859                             if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2860                                 if (!$this->use_classes) {
2861                                     $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2862                                 } else {
2863                                     $attributes = ' class="co' . $comment['key'] . '"';
2864                                 }
2865 
2866                                 $test_str = "<span$attributes>" . $test_str . "</span>";
2867 
2868                                 // Short-cut through all the multiline code
2869                                 if ($check_linenumbers) {
2870                                     // strreplace to put close span and open span around multiline newlines
2871                                     $test_str = str_replace(
2872                                         "\n", "</span>\n<span$attributes>",
2873                                         str_replace("\n ", "\n&nbsp;", $test_str)
2874                                     );
2875                                 }
2876                             }
2877 
2878                             $i += $comment['length'] - 1;
2879 
2880                             // parse the rest
2881                             $result .= $this->parse_non_string_part($stuff_to_parse);
2882                             $stuff_to_parse = '';
2883                         }
2884 
2885                         // If we haven't matched a regexp comment, try multi-line comments
2886                         if (!$COMMENT_MATCHED) {
2887                             // Is this a multiline comment?
2888                             if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2889                                 $next_comment_multi_pos = $length;
2890                                 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2891                                     $match_i = false;
2892                                     if (isset($comment_multi_cache_per_key[$open]) &&
2893                                         ($comment_multi_cache_per_key[$open] >= $i ||
2894                                          $comment_multi_cache_per_key[$open] === false)) {
2895                                         // we have already matched something
2896                                         if ($comment_multi_cache_per_key[$open] === false) {
2897                                             // this comment is never matched
2898                                             continue;
2899                                         }
2900                                         $match_i = $comment_multi_cache_per_key[$open];
2901                                     } elseif (($match_i = stripos($part, $open, $i)) !== false) {
2902                                         $comment_multi_cache_per_key[$open] = $match_i;
2903                                     } else {
2904                                         $comment_multi_cache_per_key[$open] = false;
2905                                         continue;
2906                                     }
2907                                     if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2908                                         $next_comment_multi_pos = $match_i;
2909                                         $next_open_comment_multi = $open;
2910                                         if ($match_i === $i) {
2911                                             break;
2912                                         }
2913                                     }
2914                                 }
2915                             }
2916                             if ($i == $next_comment_multi_pos) {
2917                                 $open = $next_open_comment_multi;
2918                                 $close = $this->language_data['COMMENT_MULTI'][$open];
2919                                 $open_strlen = strlen($open);
2920                                 $close_strlen = strlen($close);
2921                                 $COMMENT_MATCHED = true;
2922                                 $test_str_match = $open;
2923                                 //@todo If remove important do remove here
2924                                 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2925                                     $open == GESHI_START_IMPORTANT) {
2926                                     if ($open != GESHI_START_IMPORTANT) {
2927                                         if (!$this->use_classes) {
2928                                             $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2929                                         } else {
2930                                             $attributes = ' class="coMULTI"';
2931                                         }
2932                                         $test_str = "<span$attributes>" . $this->hsc($open);
2933                                     } else {
2934                                         if (!$this->use_classes) {
2935                                             $attributes = ' style="' . $this->important_styles . '"';
2936                                         } else {
2937                                             $attributes = ' class="imp"';
2938                                         }
2939 
2940                                         // We don't include the start of the comment if it's an
2941                                         // "important" part
2942                                         $test_str = "<span$attributes>";
2943                                     }
2944                                 } else {
2945                                     $test_str = $this->hsc($open);
2946                                 }
2947 
2948                                 $close_pos = strpos( $part, $close, $i + $open_strlen );
2949 
2950                                 if ($close_pos === false) {
2951                                     $close_pos = $length;
2952                                 }
2953 
2954                                 // Short-cut through all the multiline code
2955                                 $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2956                                 if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
2957                                     $test_str_match == GESHI_START_IMPORTANT) &&
2958                                     $check_linenumbers) {
2959 
2960                                     // strreplace to put close span and open span around multiline newlines
2961                                     $test_str .= str_replace(
2962                                         "\n", "</span>\n<span$attributes>",
2963                                         str_replace("\n ", "\n&nbsp;", $rest_of_comment)
2964                                     );
2965                                 } else {
2966                                     $test_str .= $rest_of_comment;
2967                                 }
2968 
2969                                 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2970                                     $test_str_match == GESHI_START_IMPORTANT) {
2971                                     $test_str .= '</span>';
2972                                 }
2973 
2974                                 $i = $close_pos + $close_strlen - 1;
2975 
2976                                 // parse the rest
2977                                 $result .= $this->parse_non_string_part($stuff_to_parse);
2978                                 $stuff_to_parse = '';
2979                             }
2980                         }
2981 
2982                         // If we haven't matched a multiline comment, try single-line comments
2983                         if (!$COMMENT_MATCHED) {
2984                             // cache potential single line comment occurances
2985                             if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2986                                 $next_comment_single_pos = $length;
2987                                 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2988                                     $match_i = false;
2989                                     if (isset($comment_single_cache_per_key[$comment_key]) &&
2990                                         ($comment_single_cache_per_key[$comment_key] >= $i ||
2991                                          $comment_single_cache_per_key[$comment_key] === false)) {
2992                                         // we have already matched something
2993                                         if ($comment_single_cache_per_key[$comment_key] === false) {
2994                                             // this comment is never matched
2995                                             continue;
2996                                         }
2997                                         $match_i = $comment_single_cache_per_key[$comment_key];
2998                                     } elseif (
2999                                         // case sensitive comments
3000                                         ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3001                                         ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
3002                                         // non case sensitive
3003                                         (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3004                                           (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
3005                                         $comment_single_cache_per_key[$comment_key] = $match_i;
3006                                     } else {
3007                                         $comment_single_cache_per_key[$comment_key] = false;
3008                                         continue;
3009                                     }
3010                                     if ($match_i !== false && $match_i < $next_comment_single_pos) {
3011                                         $next_comment_single_pos = $match_i;
3012                                         $next_comment_single_key = $comment_key;
3013                                         if ($match_i === $i) {
3014                                             break;
3015                                         }
3016                                     }
3017                                 }
3018                             }
3019                             if ($next_comment_single_pos == $i) {
3020                                 $comment_key = $next_comment_single_key;
3021                                 $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
3022                                 $com_len = strlen($comment_mark);
3023 
3024                                 // This check will find special variables like $# in bash
3025                                 // or compiler directives of Delphi beginning {$
3026                                 if ((empty($sc_disallowed_before) || ($i == 0) ||
3027                                     (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
3028                                     (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
3029                                     (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
3030                                 {
3031                                     // this is a valid comment
3032                                     $COMMENT_MATCHED = true;
3033                                     if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3034                                         if (!$this->use_classes) {
3035                                             $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
3036                                         } else {
3037                                             $attributes = ' class="co' . $comment_key . '"';
3038                                         }
3039                                         $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
3040                                     } else {
3041                                         $test_str = $this->hsc($comment_mark);
3042                                     }
3043 
3044                                     //Check if this comment is the last in the source
3045                                     $close_pos = strpos($part, "\n", $i);
3046                                     $oops = false;
3047                                     if ($close_pos === false) {
3048                                         $close_pos = $length;
3049                                         $oops = true;
3050                                     }
3051                                     $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
3052                                     if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3053                                         $test_str .= "</span>";
3054                                     }
3055 
3056                                     // Take into account that the comment might be the last in the source
3057                                     if (!$oops) {
3058                                       $test_str .= "\n";
3059                                     }
3060 
3061                                     $i = $close_pos;
3062 
3063                                     // parse the rest
3064                                     $result .= $this->parse_non_string_part($stuff_to_parse);
3065                                     $stuff_to_parse = '';
3066                                 }
3067                             }
3068                         }
3069                     }
3070 
3071                     // Where are we adding this char?
3072                     if (!$COMMENT_MATCHED) {
3073                         $stuff_to_parse .= $char;
3074                     } else {
3075                         $result .= $test_str;
3076                         unset($test_str);
3077                         $COMMENT_MATCHED = false;
3078                     }
3079                 }
3080                 // Parse the last bit
3081                 $result .= $this->parse_non_string_part($stuff_to_parse);
3082                 $stuff_to_parse = '';
3083             } else {
3084                 $result .= $this->hsc($part);
3085             }
3086             // Close the <span> that surrounds the block
3087             if ($STRICTATTRS != '') {
3088                 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
3089                 $result .= '</span>';
3090             }
3091 
3092             $endresult .= $result;
3093             unset($part, $parts[$key], $result);
3094         }
3095 
3096         //This fix is related to SF#1923020, but has to be applied regardless of
3097         //actually highlighting symbols.
3098         /** NOTE: memorypeak #3 */
3099         $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
3100 
3101 //        // Parse the last stuff (redundant?)
3102 //        $result .= $this->parse_non_string_part($stuff_to_parse);
3103 
3104         // Lop off the very first and last spaces
3105 //        $result = substr($result, 1, -1);
3106 
3107         // We're finished: stop timing
3108         $this->set_time($start_time, microtime());
3109 
3110         $this->finalise($endresult);
3111         return $endresult;
3112     }
3113 
3114     /**
3115      * Swaps out spaces and tabs for HTML indentation. Not needed if
3116      * the code is in a pre block...
3117      *
3118      * @param  string $result The source to indent (reference!)
3119      * @since  1.0.0
3120      */
3121     protected function indent(&$result) {
3122         /// Replace tabs with the correct number of spaces
3123         if (false !== strpos($result, "\t")) {
3124             $lines = explode("\n", $result);
3125             $result = null;//Save memory while we process the lines individually
3126             $tab_width = $this->get_real_tab_width();
3127             $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
3128 
3129             for ($key = 0, $n = count($lines); $key < $n; $key++) {
3130                 $line = $lines[$key];
3131                 if (false === strpos($line, "\t")) {
3132                     continue;
3133                 }
3134 
3135                 $pos = 0;
3136                 $length = strlen($line);
3137                 $lines[$key] = ''; // reduce memory
3138 
3139                 $IN_TAG = false;
3140                 for ($i = 0; $i < $length; ++$i) {
3141                     $char = $line[$i];
3142                     // Simple engine to work out whether we're in a tag.
3143                     // If we are we modify $pos. This is so we ignore HTML
3144                     // in the line and only workout the tab replacement
3145                     // via the actual content of the string
3146                     // This test could be improved to include strings in the
3147                     // html so that < or > would be allowed in user's styles
3148                     // (e.g. quotes: '<' '>'; or similar)
3149                     if ($IN_TAG) {
3150                         if ('>' == $char) {
3151                             $IN_TAG = false;
3152                         }
3153                         $lines[$key] .= $char;
3154                     } elseif ('<' == $char) {
3155                         $IN_TAG = true;
3156                         $lines[$key] .= '<';
3157                     } elseif ('&' == $char) {
3158                         $substr = substr($line, $i + 3, 5);
3159                         $posi = strpos($substr, ';');
3160                         if (false === $posi) {
3161                             ++$pos;
3162                         } else {
3163                             $pos -= $posi+2;
3164                         }
3165                         $lines[$key] .= $char;
3166                     } elseif ("\t" == $char) {
3167                         $str = '';
3168                         // OPTIMISE - move $strs out. Make an array:
3169                         // $tabs = array(
3170                         //  1 => '&nbsp;',
3171                         //  2 => '&nbsp; ',
3172                         //  3 => '&nbsp; &nbsp;' etc etc
3173                         // to use instead of building a string every time
3174                         $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
3175                         if (($pos & 1) || 1 == $tab_end_width) {
3176                             $str .= substr($tab_string, 6, $tab_end_width);
3177                         } else {
3178                             $str .= substr($tab_string, 0, $tab_end_width+5);
3179                         }
3180                         $lines[$key] .= $str;
3181                         $pos += $tab_end_width;
3182 
3183                         if (false === strpos($line, "\t", $i + 1)) {
3184                             $lines[$key] .= substr($line, $i + 1);
3185                             break;
3186                         }
3187                     } elseif (0 == $pos && ' ' == $char) {
3188                         $lines[$key] .= '&nbsp;';
3189                         ++$pos;
3190                     } else {
3191                         $lines[$key] .= $char;
3192                         ++$pos;
3193                     }
3194                 }
3195             }
3196             $result = implode("\n", $lines);
3197             unset($lines);//We don't need the lines separated beyond this --- free them!
3198         }
3199         // Other whitespace
3200         // BenBE: Fix to reduce the number of replacements to be done
3201         $result = preg_replace('/^ /m', '&nbsp;', $result);
3202         $result = str_replace('  ', ' &nbsp;', $result);
3203 
3204         if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3205             if ($this->line_ending === null) {
3206                 $result = nl2br($result);
3207             } else {
3208                 $result = str_replace("\n", $this->line_ending, $result);
3209             }
3210         }
3211     }
3212 
3213     /**
3214      * Changes the case of a keyword for those languages where a change is asked for
3215      *
3216      * @param  string $instr The keyword to change the case of
3217      * @return string The keyword with its case changed
3218      * @since  1.0.0
3219      */
3220     protected function change_case($instr) {
3221         switch ($this->language_data['CASE_KEYWORDS']) {
3222             case GESHI_CAPS_UPPER:
3223                 return strtoupper($instr);
3224             case GESHI_CAPS_LOWER:
3225                 return strtolower($instr);
3226             default:
3227                 return $instr;
3228         }
3229     }
3230 
3231     /**
3232      * Handles replacements of keywords to include markup and links if requested
3233      *
3234      * @param  string $match The keyword to add the Markup to
3235      * @return string The HTML for the match found
3236      * @since  1.0.8
3237      *
3238      * @todo   Get rid of ender in keyword links
3239      */
3240     protected function handle_keyword_replace($match) {
3241         $k = $this->_kw_replace_group;
3242         $keyword = $match[0];
3243         $keyword_match = $match[1];
3244 
3245         $before = '';
3246         $after = '';
3247 
3248         if ($this->keyword_links) {
3249             // Keyword links have been ebabled
3250 
3251             if (isset($this->language_data['URLS'][$k]) &&
3252                 $this->language_data['URLS'][$k] != '') {
3253                 // There is a base group for this keyword
3254 
3255                 // Old system: strtolower
3256                 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3257                 // New system: get keyword from language file to get correct case
3258                 if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3259                     strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3260                     foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3261                         if (strcasecmp($word, $keyword_match) == 0) {
3262                             break;
3263                         }
3264                     }
3265                 } else {
3266                     $word = $keyword_match;
3267                 }
3268 
3269                 $before = '<|UR1|"' .
3270                     str_replace(
3271                         array(
3272                             '{FNAME}',
3273                             '{FNAMEL}',
3274                             '{FNAMEU}',
3275                             '{FNAMEUF}',
3276                             '.'),
3277                         array(
3278                             str_replace('+', '%20', urlencode($this->hsc($word))),
3279                             str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3280                             str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3281                             str_replace('+', '%20', urlencode($this->hsc(ucfirst($word)))),
3282                             '<DOT>'),
3283                         $this->language_data['URLS'][$k]
3284                     ) . '">';
3285                 $after = '</a>';
3286             }
3287         }
3288 
3289         return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3290     }
3291 
3292     /**
3293      * handles regular expressions highlighting-definitions with callback functions
3294      *
3295      * @note this is a callback, don't use it directly
3296      *
3297      * @param array $matches the matches array
3298      * @return string The highlighted string
3299      * @since 1.0.8
3300      */
3301     protected function handle_regexps_callback($matches) {
3302         // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3303         return  ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
3304     }
3305 
3306     /**
3307      * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
3308      *
3309      * @note this is a callback, don't use it directly
3310      *
3311      * @param array $matches the matches array
3312      * @return string
3313      * @since 1.0.8
3314      */
3315     protected function handle_multiline_regexps($matches) {
3316         $before = $this->_hmr_before;
3317         $after = $this->_hmr_after;
3318         if ($this->_hmr_replace) {
3319             $replace = $this->_hmr_replace;
3320             $search = array();
3321 
3322             foreach (array_keys($matches) as $k) {
3323                 $search[] = '\\' . $k;
3324             }
3325 
3326             $before = str_replace($search, $matches, $before);
3327             $after = str_replace($search, $matches, $after);
3328             $replace = str_replace($search, $matches, $replace);
3329         } else {
3330             $replace = $matches[0];
3331         }
3332         return $before
3333                     . '<|!REG3XP' . $this->_hmr_key .'!>'
3334                         . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
3335                     . '|>'
3336               . $after;
3337     }
3338 
3339     /**
3340      * Takes a string that has no strings or comments in it, and highlights
3341      * stuff like keywords, numbers and methods.
3342      *
3343      * @param string $stuff_to_parse The string to parse for keyword, numbers etc.
3344      * @since 1.0.0
3345      * @todo BUGGY! Why? Why not build string and return?
3346      * @return string
3347      */
3348     protected function parse_non_string_part($stuff_to_parse) {
3349         $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3350 
3351         // Highlight keywords
3352         $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#|^&";
3353         $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3354         if ($this->lexic_permissions['STRINGS']) {
3355             $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3356             $disallowed_before .= $quotemarks;
3357             $disallowed_after .= $quotemarks;
3358         }
3359         $disallowed_before .= "])";
3360         $disallowed_after .= "])";
3361 
3362         $parser_control_pergroup = false;
3363         if (isset($this->language_data['PARSER_CONTROL'])) {
3364             if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3365                 $x = 0; // check wether per-keyword-group parser_control is enabled
3366                 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3367                     $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3368                     ++$x;
3369                 }
3370                 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3371                     $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3372                     ++$x;
3373                 }
3374                 $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3375             }
3376         }
3377 
3378         foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3379             if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
3380                 $this->lexic_permissions['KEYWORDS'][$k]) {
3381 
3382                 $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3383                 $modifiers = $case_sensitive ? '' : 'i';
3384 
3385                 // NEW in 1.0.8 - per-keyword-group parser control
3386                 $disallowed_before_local = $disallowed_before;
3387                 $disallowed_after_local = $disallowed_after;
3388                 if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3389                     if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3390                         $disallowed_before_local =
3391                             $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3392                     }
3393 
3394                     if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3395                         $disallowed_after_local =
3396                             $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3397                     }
3398                 }
3399 
3400                 $this->_kw_replace_group = $k;
3401 
3402                 //NEW in 1.0.8, the cached regexp list
3403                 // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3404                 for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set <  $set_length; ++$set) {
3405                     $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3406                     // Might make a more unique string for putting the number in soon
3407                     // Basically, we don't put the styles in yet because then the styles themselves will
3408                     // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3409                     $stuff_to_parse = preg_replace_callback(
3410                         "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php|aspx?))$disallowed_after_local/$modifiers",
3411                         array($this, 'handle_keyword_replace'),
3412                         $stuff_to_parse
3413                         );
3414                 }
3415             }
3416         }
3417 
3418         // Regular expressions
3419         foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3420             if ($this->lexic_permissions['REGEXPS'][$key]) {
3421                 if (is_array($regexp)) {
3422                     if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3423                         // produce valid HTML when we match multiple lines
3424                         $this->_hmr_replace = $regexp[GESHI_REPLACE];
3425                         $this->_hmr_before = $regexp[GESHI_BEFORE];
3426                         $this->_hmr_key = $key;
3427                         $this->_hmr_after = $regexp[GESHI_AFTER];
3428                         $stuff_to_parse = preg_replace_callback(
3429                             "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3430                             array($this, 'handle_multiline_regexps'),
3431                             $stuff_to_parse);
3432                         $this->_hmr_replace = false;
3433                         $this->_hmr_before = '';
3434                         $this->_hmr_after = '';
3435                     } else {
3436                         $stuff_to_parse = preg_replace(
3437                             '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3438                             $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
3439                             $stuff_to_parse);
3440                     }
3441                 } else {
3442                     if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3443                         // produce valid HTML when we match multiple lines
3444                         $this->_hmr_key = $key;
3445                         $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3446                                               array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3447                         $this->_hmr_key = '';
3448                     } else {
3449                         $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3450                     }
3451                 }
3452             }
3453         }
3454 
3455         // Highlight numbers. As of 1.0.8 we support different types of numbers
3456         $numbers_found = false;
3457 
3458         if ($this->lexic_permissions['NUMBERS'] && preg_match($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'], $stuff_to_parse )) {
3459             $numbers_found = true;
3460 
3461             //For each of the formats ...
3462             foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3463                 //Check if it should be highlighted ...
3464                 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3465             }
3466         }
3467 
3468         //
3469         // Now that's all done, replace /[number]/ with the correct styles
3470         //
3471         foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3472             if (!$this->use_classes) {
3473                 $attributes = ' style="' .
3474                     (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3475                     $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3476             } else {
3477                 $attributes = ' class="kw' . $k . '"';
3478             }
3479             $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3480         }
3481 
3482         if ($numbers_found) {
3483             // Put number styles in
3484             foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3485                 //Commented out for now, as this needs some review ...
3486                 //                if ($numbers_permissions & $id) {
3487                 //Get the appropriate style ...
3488                 //Checking for unset styles is done by the style cache builder ...
3489                 if (!$this->use_classes) {
3490                     $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3491                 } else {
3492                     $attributes = ' class="nu'.$id.'"';
3493                 }
3494 
3495                 //Set in the correct styles ...
3496                 $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3497                 //                }
3498             }
3499         }
3500 
3501         // Highlight methods and fields in objects
3502         if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3503             $oolang_spaces = "[\s]*";
3504             $oolang_before = "";
3505             $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3506             if (isset($this->language_data['PARSER_CONTROL'])) {
3507                 if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3508                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3509                         $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3510                     }
3511                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3512                         $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3513                     }
3514                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3515                         $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3516                     }
3517                 }
3518             }
3519 
3520             foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3521                 if (false !== strpos($stuff_to_parse, $splitter)) {
3522                     if (!$this->use_classes) {
3523                         $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3524                     } else {
3525                         $attributes = ' class="me' . $key . '"';
3526                     }
3527                     $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3528                 }
3529             }
3530         }
3531 
3532         //
3533         // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3534         // You try it, and see what happens ;)
3535         // TODO: Fix lexic permissions not converting entities if shouldn't
3536         // be highlighting regardless
3537         //
3538         if ($this->lexic_permissions['BRACKETS']) {
3539             $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3540                               $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3541         }
3542 
3543 
3544         //FIX for symbol highlighting ...
3545         if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3546             //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3547             $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
3548             $global_offset = 0;
3549             for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3550                 $symbol_match = $pot_symbols[$s_id][0][0];
3551                 if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
3552                     // already highlighted blocks _must_ include either < or >
3553                     // so if this conditional applies, we have to skip this match
3554                     // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3555                     if(strpos($symbol_match, '<SEMI>') === false &&
3556                         strpos($symbol_match, '<PIPE>') === false) {
3557                         continue;
3558                     }
3559                 }
3560 
3561                 // if we reach this point, we have a valid match which needs to be highlighted
3562 
3563                 $symbol_length = strlen($symbol_match);
3564                 $symbol_offset = $pot_symbols[$s_id][0][1];
3565                 unset($pot_symbols[$s_id]);
3566                 $symbol_hl = "";
3567 
3568                 // if we have multiple styles, we have to handle them properly
3569                 if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3570                     $old_sym = -1;
3571                     // Split the current stuff to replace into its atomic symbols ...
3572                     preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3573                     foreach ($sym_match_syms[0] as $sym_ms) {
3574                         //Check if consequtive symbols belong to the same group to save output ...
3575                         if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3576                             && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3577                             if (-1 != $old_sym) {
3578                                 $symbol_hl .= "|>";
3579                             }
3580                             $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3581                             if (!$this->use_classes) {
3582                                 $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3583                             } else {
3584                                 $symbol_hl .= '<| class="sy' . $old_sym . '">';
3585                             }
3586                         }
3587                         $symbol_hl .= $sym_ms;
3588                     }
3589                     unset($sym_match_syms);
3590 
3591                     //Close remaining tags and insert the replacement at the right position ...
3592                     //Take caution if symbol_hl is empty to avoid doubled closing spans.
3593                     if (-1 != $old_sym) {
3594                         $symbol_hl .= "|>";
3595                     }
3596                 } else {
3597                     if (!$this->use_classes) {
3598                         $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3599                     } else {
3600                         $symbol_hl = '<| class="sy0">';
3601                     }
3602                     $symbol_hl .= $symbol_match . '|>';
3603                 }
3604 
3605                 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3606 
3607                 // since we replace old text with something of different size,
3608                 // we'll have to keep track of the differences
3609                 $global_offset += strlen($symbol_hl) - $symbol_length;
3610             }
3611         }
3612         //FIX for symbol highlighting ...
3613 
3614         // Add class/style for regexps
3615         foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3616             if ($this->lexic_permissions['REGEXPS'][$key]) {
3617                 if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3618                     $this->_rx_key = $key;
3619                     $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3620                         array($this, 'handle_regexps_callback'),
3621                         $stuff_to_parse);
3622                 } else {
3623                     if (!$this->use_classes) {
3624                         $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3625                     } else {
3626                         if (is_array($this->language_data['REGEXPS'][$key]) &&
3627                             array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3628                             $attributes = ' class="' .
3629                                 $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3630                         } else {
3631                            $attributes = ' class="re' . $key . '"';
3632                         }
3633                     }
3634                     $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3635                 }
3636             }
3637         }
3638 
3639         // Replace <DOT> with . for urls
3640         $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3641         // Replace <|UR1| with <a href= for urls also
3642         if (isset($this->link_styles[GESHI_LINK])) {
3643             if ($this->use_classes) {
3644                 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3645             } else {
3646                 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3647             }
3648         } else {
3649             $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3650         }
3651 
3652         //
3653         // NOW we add the span thingy ;)
3654         //
3655 
3656         $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3657         $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3658         return substr($stuff_to_parse, 1);
3659     }
3660 
3661     /**
3662      * Sets the time taken to parse the code
3663      *
3664      * @param string $start_time The time when parsing started as returned by @see microtime()
3665      * @param string $end_time   The time when parsing ended as returned by @see microtime()
3666      * @since 1.0.2
3667      */
3668     protected function set_time($start_time, $end_time) {
3669         $start = explode(' ', $start_time);
3670         $end = explode(' ', $end_time);
3671         $this->time = $end[0] + $end[1] - $start[0] - $start[1];
3672     }
3673 
3674     /**
3675      * Gets the time taken to parse the code
3676      *
3677      * @return double The time taken to parse the code
3678      * @since  1.0.2
3679      */
3680     public function get_time() {
3681         return $this->time;
3682     }
3683 
3684     /**
3685      * Merges arrays recursively, overwriting values of the first array with values of later arrays
3686      *
3687      * @since 1.0.8
3688      */
3689     protected function merge_arrays() {
3690         $arrays = func_get_args();
3691         $narrays = count($arrays);
3692 
3693         // check arguments
3694         // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3695         for ($i = 0; $i < $narrays; $i ++) {
3696             if (!is_array($arrays[$i])) {
3697                 // also array_merge_recursive returns nothing in this case
3698                 trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3699                 return false;
3700             }
3701         }
3702 
3703         // the first array is in the output set in every case
3704         $ret = $arrays[0];
3705 
3706         // merege $ret with the remaining arrays
3707         for ($i = 1; $i < $narrays; $i ++) {
3708             foreach ($arrays[$i] as $key => $value) {
3709                 if (is_array($value) && isset($ret[$key])) {
3710                     // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3711                     // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3712                     $ret[$key] = $this->merge_arrays($ret[$key], $value);
3713                 } else {
3714                     $ret[$key] = $value;
3715                 }
3716             }
3717         }
3718 
3719         return $ret;
3720     }
3721 
3722     /**
3723      * Gets language information and stores it for later use
3724      *
3725      * @param string $file_name The filename of the language file you want to load
3726      * @since 1.0.0
3727      * @todo Needs to load keys for lexic permissions for keywords, regexps etc
3728      */
3729     protected function load_language($file_name) {
3730         if ($file_name == $this->loaded_language) {
3731             // this file is already loaded!
3732             return;
3733         }
3734 
3735         //Prepare some stuff before actually loading the language file
3736         $this->loaded_language = $file_name;
3737         $this->parse_cache_built = false;
3738         $this->enable_highlighting();
3739         $language_data = array();
3740 
3741         //Load the language file
3742         require $file_name;
3743 
3744         // Perhaps some checking might be added here later to check that
3745         // $language data is a valid thing but maybe not
3746         $this->language_data = $language_data;
3747 
3748         // Set strict mode if should be set
3749         $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3750 
3751         // Set permissions for all lexics to true
3752         // so they'll be highlighted by default
3753         foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3754             if (!empty($this->language_data['KEYWORDS'][$key])) {
3755                 $this->lexic_permissions['KEYWORDS'][$key] = true;
3756             } else {
3757                 $this->lexic_permissions['KEYWORDS'][$key] = false;
3758             }
3759         }
3760 
3761         foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3762             $this->lexic_permissions['COMMENTS'][$key] = true;
3763         }
3764         foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3765             $this->lexic_permissions['REGEXPS'][$key] = true;
3766         }
3767 
3768         // for BenBE and future code reviews:
3769         // we can use empty here since we only check for existance and emptiness of an array
3770         // if it is not an array at all but rather false or null this will work as intended as well
3771         // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3772         if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3773             foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3774                 // it's either true or false and maybe is true as well
3775                 $perm = $value !== GESHI_NEVER;
3776                 if ($flag == 'ALL') {
3777                     $this->enable_highlighting($perm);
3778                     continue;
3779                 }
3780                 if (!isset($this->lexic_permissions[$flag])) {
3781                     // unknown lexic permission
3782                     continue;
3783                 }
3784                 if (is_array($this->lexic_permissions[$flag])) {
3785                     foreach ($this->lexic_permissions[$flag] as $key => $val) {
3786                         $this->lexic_permissions[$flag][$key] = $perm;
3787                     }
3788                 } else {
3789                     $this->lexic_permissions[$flag] = $perm;
3790                 }
3791             }
3792             unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3793         }
3794 
3795         //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3796         //You need to set one for HARDESCAPES only in this case.
3797         if(!isset($this->language_data['HARDCHAR'])) {
3798             $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
3799         }
3800 
3801         //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3802         $style_filename = substr($file_name, 0, -4) . '.style.php';
3803         if (is_readable($style_filename)) {
3804             //Clear any style_data that could have been set before ...
3805             if (isset($style_data)) {
3806                 unset($style_data);
3807             }
3808 
3809             //Read the Style Information from the style file
3810             include $style_filename;
3811 
3812             //Apply the new styles to our current language styles
3813             if (isset($style_data) && is_array($style_data)) {
3814                 $this->language_data['STYLES'] =
3815                     $this->merge_arrays($this->language_data['STYLES'], $style_data);
3816             }
3817         }
3818     }
3819 
3820     /**
3821      * Takes the parsed code and various options, and creates the HTML
3822      * surrounding it to make it look nice.
3823      *
3824      * @param  string $parsed_code The code already parsed (reference!)
3825      * @since  1.0.0
3826      */
3827     protected function finalise(&$parsed_code) {
3828         // Remove end parts of important declarations
3829         // This is BUGGY!! My fault for bad code: fix coming in 1.2
3830         // @todo Remove this crap
3831         if ($this->enable_important_blocks &&
3832             (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3833             $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3834         }
3835 
3836         // Add HTML whitespace stuff if we're using the <div> header
3837         if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3838             $this->indent($parsed_code);
3839         }
3840 
3841         // purge some unnecessary stuff
3842         /** NOTE: memorypeak #1 */
3843         $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3844 
3845         // If we are using IDs for line numbers, there needs to be an overall
3846         // ID set to prevent collisions.
3847         if ($this->add_ids && !$this->overall_id) {
3848             $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3849         }
3850 
3851         // Get code into lines
3852         /** NOTE: memorypeak #2 */
3853         $code = explode("\n", $parsed_code);
3854         $parsed_code = $this->header();
3855 
3856         // If we're using line numbers, we insert <li>s and appropriate
3857         // markup to style them (otherwise we don't need to do anything)
3858         if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3859             // If we're using the <pre> header, we shouldn't add newlines because
3860             // the <pre> will line-break them (and the <li>s already do this for us)
3861             $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3862 
3863             // Foreach line...
3864             for ($i = 0, $n = count($code); $i < $n;) {
3865                 //Reset the attributes for a new line ...
3866                 $attrs = array();
3867 
3868                 // Make lines have at least one space in them if they're empty
3869                 // BenBE: Checking emptiness using trim instead of relying on blanks
3870                 if ('' == trim($code[$i])) {
3871                     $code[$i] = '&nbsp;';
3872                 }
3873 
3874                 // If this is a "special line"...
3875                 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3876                     $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3877                     // Set the attributes to style the line
3878                     if ($this->use_classes) {
3879                         //$attr = ' class="li2"';
3880                         $attrs['class'][] = 'li2';
3881                         $def_attr = ' class="de2"';
3882                     } else {
3883                         //$attr = ' style="' . $this->line_style2 . '"';
3884                         $attrs['style'][] = $this->line_style2;
3885                         // This style "covers up" the special styles set for special lines
3886                         // so that styles applied to special lines don't apply to the actual
3887                         // code on that line
3888                         $def_attr = ' style="' . $this->code_style . '"';
3889                     }
3890                 } else {
3891                     if ($this->use_classes) {
3892                         //$attr = ' class="li1"';
3893                         $attrs['class'][] = 'li1';
3894                         $def_attr = ' class="de1"';
3895                     } else {
3896                         //$attr = ' style="' . $this->line_style1 . '"';
3897                         $attrs['style'][] = $this->line_style1;
3898                         $def_attr = ' style="' . $this->code_style . '"';
3899                     }
3900                 }
3901 
3902                 //Check which type of tag to insert for this line
3903                 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3904                     $start = "<pre$def_attr>";
3905                     $end = '</pre>';
3906                 } else {
3907                     // Span or div?
3908                     $start = "<div$def_attr>";
3909                     $end = '</div>';
3910                 }
3911 
3912                 ++$i;
3913 
3914                 // Are we supposed to use ids? If so, add them
3915                 if ($this->add_ids) {
3916                     $attrs['id'][] = "$this->overall_id-$i";
3917                 }
3918 
3919                 //Is this some line with extra styles???
3920                 if (in_array($i, $this->highlight_extra_lines)) {
3921                     if ($this->use_classes) {
3922                         if (isset($this->highlight_extra_lines_styles[$i])) {
3923                             $attrs['class'][] = "lx$i";
3924                         } else {
3925                             $attrs['class'][] = "ln-xtra";
3926                         }
3927                     } else {
3928                         array_push($attrs['style'], $this->get_line_style($i));
3929                     }
3930                 }
3931 
3932                 // Add in the line surrounded by appropriate list HTML
3933                 $attr_string = '';
3934                 foreach ($attrs as $key => $attr) {
3935                     $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3936                 }
3937 
3938                 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3939                 unset($code[$i - 1]);
3940             }
3941         } else {
3942             $n = count($code);
3943             if ($this->use_classes) {
3944                 $attributes = ' class="de1"';
3945             } else {
3946                 $attributes = ' style="'. $this->code_style .'"';
3947             }
3948             if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3949                 $parsed_code .= '<pre'. $attributes .'>';
3950             } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3951                 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3952                     if ($this->use_classes) {
3953                         $attrs = ' class="ln"';
3954                     } else {
3955                         $attrs = ' style="'. $this->table_linenumber_style .'"';
3956                     }
3957                     $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3958                     // get linenumbers
3959                     // we don't merge it with the for below, since it should be better for
3960                     // memory consumption this way
3961                     // @todo: but... actually it would still be somewhat nice to merge the two loops
3962                     //        the mem peaks are at different positions
3963                     for ($i = 0; $i < $n; ++$i) {
3964                         $close = 0;
3965                         // fancy lines
3966                         if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3967                             $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3968                             // Set the attributes to style the line
3969                             if ($this->use_classes) {
3970                                 $parsed_code .= '<span class="xtra li2"><span class="de2">';
3971                             } else {
3972                                 // This style "covers up" the special styles set for special lines
3973                                 // so that styles applied to special lines don't apply to the actual
3974                                 // code on that line
3975                                 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3976                                                   .'<span style="' . $this->code_style .'">';
3977                             }
3978                             $close += 2;
3979                         }
3980                         //Is this some line with extra styles???
3981                         if (in_array($i + 1, $this->highlight_extra_lines)) {
3982                             if ($this->use_classes) {
3983                                 if (isset($this->highlight_extra_lines_styles[$i])) {
3984                                     $parsed_code .= "<span class=\"xtra lx$i\">";
3985                                 } else {
3986                                     $parsed_code .= "<span class=\"xtra ln-xtra\">";
3987                                 }
3988                             } else {
3989                                 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3990                             }
3991                             ++$close;
3992                         }
3993                         $parsed_code .= $this->line_numbers_start + $i;
3994                         if ($close) {
3995                             $parsed_code .= str_repeat('</span>', $close);
3996                         } elseif ($i != $n) {
3997                             $parsed_code .= "\n";
3998                         }
3999                     }
4000                     $parsed_code .= '</pre></td><td'.$attributes.'>';
4001                 }
4002                 $parsed_code .= '<pre'. $attributes .'>';
4003             }
4004             // No line numbers, but still need to handle highlighting lines extra.
4005             // Have to use divs so the full width of the code is highlighted
4006             $close = 0;
4007             for ($i = 0; $i < $n; ++$i) {
4008                 // Make lines have at least one space in them if they're empty
4009                 // BenBE: Checking emptiness using trim instead of relying on blanks
4010                 if ('' == trim($code[$i])) {
4011                     $code[$i] = '&nbsp;';
4012                 }
4013                 // fancy lines
4014                 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
4015                     $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
4016                     // Set the attributes to style the line
4017                     if ($this->use_classes) {
4018                         $parsed_code .= '<span class="xtra li2"><span class="de2">';
4019                     } else {
4020                         // This style "covers up" the special styles set for special lines
4021                         // so that styles applied to special lines don't apply to the actual
4022                         // code on that line
4023                         $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
4024                                           .'<span style="' . $this->code_style .'">';
4025                     }
4026                     $close += 2;
4027                 }
4028                 //Is this some line with extra styles???
4029                 if (in_array($i + 1, $this->highlight_extra_lines)) {
4030                     if ($this->use_classes) {
4031                         if (isset($this->highlight_extra_lines_styles[$i])) {
4032                             $parsed_code .= "<span class=\"xtra lx$i\">";
4033                         } else {
4034                             $parsed_code .= "<span class=\"xtra ln-xtra\">";
4035                         }
4036                     } else {
4037                         $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
4038                     }
4039                     ++$close;
4040                 }
4041 
4042                 $parsed_code .= $code[$i];
4043 
4044                 if ($close) {
4045                   $parsed_code .= str_repeat('</span>', $close);
4046                   $close = 0;
4047                 }
4048                 if ($i + 1 < $n) {
4049                     $parsed_code .= "\n";
4050                 }
4051                 unset($code[$i]);
4052             }
4053 
4054             if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
4055                 $parsed_code .= '</pre>';
4056             }
4057             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4058                 $parsed_code .= '</td>';
4059             }
4060         }
4061 
4062         $parsed_code .= $this->footer();
4063     }
4064 
4065     /**
4066      * Creates the header for the code block (with correct attributes)
4067      *
4068      * @return string The header for the code block
4069      * @since  1.0.0
4070      */
4071     protected function header() {
4072         // Get attributes needed
4073         /**
4074          * @todo   Document behaviour change - class is outputted regardless of whether
4075          *         we're using classes or not. Same with style
4076          */
4077         $attributes = ' class="' . $this->_genCSSName($this->language);
4078         if ($this->overall_class != '') {
4079             $attributes .= " ".$this->_genCSSName($this->overall_class);
4080         }
4081         $attributes .= '"';
4082 
4083         if ($this->overall_id != '') {
4084             $attributes .= " id=\"{$this->overall_id}\"";
4085         }
4086         if ($this->overall_style != '' && !$this->use_classes) {
4087             $attributes .= ' style="' . $this->overall_style . '"';
4088         }
4089 
4090         $ol_attributes = '';
4091 
4092         if ($this->line_numbers_start != 1) {
4093             $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
4094         }
4095 
4096         // Get the header HTML
4097         $header = $this->header_content;
4098         if ($header) {
4099             if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
4100                 $header = str_replace("\n", '', $header);
4101             }
4102             $header = $this->replace_keywords($header);
4103 
4104             if ($this->use_classes) {
4105                 $attr = ' class="head"';
4106             } else {
4107                 $attr = " style=\"{$this->header_content_style}\"";
4108             }
4109             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4110                 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
4111             } else {
4112                 $header = "<div$attr>$header</div>";
4113             }
4114         }
4115 
4116         if (GESHI_HEADER_NONE == $this->header_type) {
4117             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4118                 return "$header<ol$attributes$ol_attributes>";
4119             }
4120             return $header . ($this->force_code_block ? '<div>' : '');
4121         }
4122 
4123         // Work out what to return and do it
4124         if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4125             if ($this->header_type == GESHI_HEADER_PRE) {
4126                 return "<pre$attributes>$header<ol$ol_attributes>";
4127             } elseif ($this->header_type == GESHI_HEADER_DIV ||
4128                 $this->header_type == GESHI_HEADER_PRE_VALID) {
4129                 return "<div$attributes>$header<ol$ol_attributes>";
4130             } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4131                 return "<table$attributes>$header<tbody><tr class=\"li1\">";
4132             }
4133         } else {
4134             if ($this->header_type == GESHI_HEADER_PRE) {
4135                 return "<pre$attributes>$header"  .
4136                     ($this->force_code_block ? '<div>' : '');
4137             } else {
4138                 return "<div$attributes>$header" .
4139                     ($this->force_code_block ? '<div>' : '');
4140             }
4141         }
4142     }
4143 
4144     /**
4145      * Returns the footer for the code block.
4146      *
4147      * @return string The footer for the code block
4148      * @since  1.0.0
4149      */
4150     protected function footer() {
4151         $footer = $this->footer_content;
4152         if ($footer) {
4153             if ($this->header_type == GESHI_HEADER_PRE) {
4154                 $footer = str_replace("\n", '', $footer);;
4155             }
4156             $footer = $this->replace_keywords($footer);
4157 
4158             if ($this->use_classes) {
4159                 $attr = ' class="foot"';
4160             } else {
4161                 $attr = " style=\"{$this->footer_content_style}\"";
4162             }
4163             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4164                 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4165             } else {
4166                 $footer = "<div$attr>$footer</div>";
4167             }
4168         }
4169 
4170         if (GESHI_HEADER_NONE == $this->header_type) {
4171             return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
4172         }
4173 
4174         if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
4175             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4176                 return "</ol>$footer</div>";
4177             }
4178             return ($this->force_code_block ? '</div>' : '') .
4179                 "$footer</div>";
4180         }
4181         elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4182             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4183                 return "</tr></tbody>$footer</table>";
4184             }
4185             return ($this->force_code_block ? '</div>' : '') .
4186                 "$footer</div>";
4187         }
4188         else {
4189             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4190                 return "</ol>$footer</pre>";
4191             }
4192             return ($this->force_code_block ? '</div>' : '') .
4193                 "$footer</pre>";
4194         }
4195     }
4196 
4197     /**
4198      * Replaces certain keywords in the header and footer with
4199      * certain configuration values
4200      *
4201      * @param  string $instr The header or footer content to do replacement on
4202      * @return string The header or footer with replaced keywords
4203      * @since  1.0.2
4204      */
4205     protected function replace_keywords($instr) {
4206         $keywords = $replacements = array();
4207 
4208         $keywords[] = '<TIME>';
4209         $keywords[] = '{TIME}';
4210         $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4211 
4212         $keywords[] = '<LANGUAGE>';
4213         $keywords[] = '{LANGUAGE}';
4214         $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4215 
4216         $keywords[] = '<VERSION>';
4217         $keywords[] = '{VERSION}';
4218         $replacements[] = $replacements[] = GESHI_VERSION;
4219 
4220         $keywords[] = '<SPEED>';
4221         $keywords[] = '{SPEED}';
4222         if ($time <= 0) {
4223             $speed = 'N/A';
4224         } else {
4225             $speed = strlen($this->source) / $time;
4226             if ($speed >= 1024) {
4227                 $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4228             } else {
4229                 $speed = sprintf("%.0f B/s", $speed);
4230             }
4231         }
4232         $replacements[] = $replacements[] = $speed;
4233 
4234         return str_replace($keywords, $replacements, $instr);
4235     }
4236 
4237     /**
4238      * Secure replacement for PHP built-in function htmlspecialchars().
4239      *
4240      * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
4241      * for this replacement function.
4242      *
4243      * The INTERFACE for this function is almost the same as that for
4244      * htmlspecialchars(), with the same default for quote style; however, there
4245      * is no 'charset' parameter. The reason for this is as follows:
4246      *
4247      * The PHP docs say:
4248      *      "The third argument charset defines character set used in conversion."
4249      *
4250      * I suspect PHP's htmlspecialchars() is working at the byte-value level and
4251      * thus _needs_ to know (or asssume) a character set because the special
4252      * characters to be replaced could exist at different code points in
4253      * different character sets. (If indeed htmlspecialchars() works at
4254      * byte-value level that goes some  way towards explaining why the
4255      * vulnerability would exist in this function, too, and not only in
4256      * htmlentities() which certainly is working at byte-value level.)
4257      *
4258      * This replacement function however works at character level and should
4259      * therefore be "immune" to character set differences - so no charset
4260      * parameter is needed or provided. If a third parameter is passed, it will
4261      * be silently ignored.
4262      *
4263      * In the OUTPUT there is a minor difference in that we use '&#39;' instead
4264      * of PHP's '&#039;' for a single quote: this provides compatibility with
4265      *      get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
4266      * (see comment by mikiwoz at yahoo dot co dot uk on
4267      * http://php.net/htmlspecialchars); it also matches the entity definition
4268      * for XML 1.0
4269      * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
4270      * Like PHP we use a numeric character reference instead of '&apos;' for the
4271      * single quote. For the other special characters we use the named entity
4272      * references, as PHP is doing.
4273      *
4274      * @author      {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
4275      *
4276      * @license     http://www.gnu.org/copyleft/lgpl.html
4277      *              GNU Lesser General Public License
4278      * @copyright   Copyright 2007, {@link http://wikkawiki.org/CreditsPage
4279      *              Wikka Development Team}
4280      *
4281      * @param       string  $string string to be converted
4282      * @param       integer $quote_style
4283      *                      - ENT_COMPAT:   escapes &, <, > and double quote (default)
4284      *                      - ENT_NOQUOTES: escapes only &, < and >
4285      *                      - ENT_QUOTES:   escapes &, <, >, double and single quotes
4286      * @return      string  converted string
4287      * @since       1.0.7.18
4288      */
4289     protected function hsc($string, $quote_style = ENT_COMPAT) {
4290         // init
4291         static $aTransSpecchar = array(
4292             '&' => '&amp;',
4293             '"' => '&quot;',
4294             '<' => '&lt;',
4295             '>' => '&gt;',
4296 
4297             //This fix is related to SF#1923020, but has to be applied
4298             //regardless of actually highlighting symbols.
4299 
4300             //Circumvent a bug with symbol highlighting
4301             //This is required as ; would produce undesirable side-effects if it
4302             //was not to be processed as an entity.
4303             ';' => '<SEMI>', // Force ; to be processed as entity
4304             '|' => '<PIPE>' // Force | to be processed as entity
4305             );                      // ENT_COMPAT set
4306 
4307         switch ($quote_style) {
4308             case ENT_NOQUOTES: // don't convert double quotes
4309                 unset($aTransSpecchar['"']);
4310                 break;
4311             case ENT_QUOTES: // convert single quotes as well
4312                 $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
4313                 break;
4314         }
4315 
4316         // return translated string
4317         return strtr($string, $aTransSpecchar);
4318     }
4319 
4320     /**
4321      * Generate a CSS class name from a given string.
4322      * Prevents invalid CSS classes.
4323      *
4324      * @param string $name Proposed class name
4325      *
4326      * @return string Safe CSS class name
4327      */
4328     protected function _genCSSName($name) {
4329         return (is_numeric($name[0]) ? '_' : '') . $name;
4330     }
4331 
4332     /**
4333      * Returns a stylesheet for the highlighted code. If $economy mode
4334      * is true, we only return the stylesheet declarations that matter for
4335      * this code block instead of the whole thing
4336      *
4337      * @param  boolean $economy_mode Whether to use economy mode or not
4338      * @return string A stylesheet built on the data for the current language
4339      * @since  1.0.0
4340      */
4341     public function get_stylesheet($economy_mode = true) {
4342         // If there's an error, chances are that the language file
4343         // won't have populated the language data file, so we can't
4344         // risk getting a stylesheet...
4345         if ($this->error) {
4346             return '';
4347         }
4348 
4349         //Check if the style rearrangements have been processed ...
4350         //This also does some preprocessing to check which style groups are useable ...
4351         if(!isset($this->language_data['NUMBERS_CACHE'])) {
4352             $this->build_style_cache();
4353         }
4354 
4355         // First, work out what the selector should be. If there's an ID,
4356         // that should be used, the same for a class. Otherwise, a selector
4357         // of '' means that these styles will be applied anywhere
4358         if ($this->overall_id) {
4359             $selector = '#' . $this->_genCSSName($this->overall_id);
4360         } else {
4361             $selector = '.' . $this->_genCSSName($this->language);
4362             if ($this->overall_class) {
4363                 $selector .= '.' . $this->_genCSSName($this->overall_class);
4364             }
4365         }
4366         $selector .= ' ';
4367 
4368         // Header of the stylesheet
4369         if (!$economy_mode) {
4370             $stylesheet = "/**\n".
4371                 " * GeSHi Dynamically Generated Stylesheet\n".
4372                 " * --------------------------------------\n".
4373                 " * Dynamically generated stylesheet for {$this->language}\n".
4374                 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4375                 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" .
4376                 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4377                 " * --------------------------------------\n".
4378                 " */\n";
4379         } else {
4380             $stylesheet = "/**\n".
4381                 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" .
4382                 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4383                 " */\n";
4384         }
4385 
4386         // Set the <ol> to have no effect at all if there are line numbers
4387         // (<ol>s have margins that should be destroyed so all layout is
4388         // controlled by the set_overall_style method, which works on the
4389         // <pre> or <div> container). Additionally, set default styles for lines
4390         if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4391             //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4392             $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4393         }
4394 
4395         // Add overall styles
4396         // note: neglect economy_mode, empty styles are meaningless
4397         if ($this->overall_style != '') {
4398             $stylesheet .= "$selector {{$this->overall_style}}\n";
4399         }
4400 
4401         // Add styles for links
4402         // note: economy mode does not make _any_ sense here
4403         //       either the style is empty and thus no selector is needed
4404         //       or the appropriate key is given.
4405         foreach ($this->link_styles as $key => $style) {
4406             if ($style != '') {
4407                 switch ($key) {
4408                     case GESHI_LINK:
4409                         $stylesheet .= "{$selector}a:link {{$style}}\n";
4410                         break;
4411                     case GESHI_HOVER:
4412                         $stylesheet .= "{$selector}a:hover {{$style}}\n";
4413                         break;
4414                     case GESHI_ACTIVE:
4415                         $stylesheet .= "{$selector}a:active {{$style}}\n";
4416                         break;
4417                     case GESHI_VISITED:
4418                         $stylesheet .= "{$selector}a:visited {{$style}}\n";
4419                         break;
4420                 }
4421             }
4422         }
4423 
4424         // Header and footer
4425         // note: neglect economy_mode, empty styles are meaningless
4426         if ($this->header_content_style != '') {
4427             $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4428         }
4429         if ($this->footer_content_style != '') {
4430             $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4431         }
4432 
4433         // Styles for important stuff
4434         // note: neglect economy_mode, empty styles are meaningless
4435         if ($this->important_styles != '') {
4436             $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4437         }
4438 
4439         // Simple line number styles
4440         if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4441             $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4442         }
4443         if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4444             $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4445         }
4446         // If there is a style set for fancy line numbers, echo it out
4447         if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4448             $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4449         }
4450 
4451         // note: empty styles are meaningless
4452         foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4453             if ($styles != '' && (!$economy_mode ||
4454                 (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4455                 $this->lexic_permissions['KEYWORDS'][$group]))) {
4456                 $stylesheet .= "$selector.kw$group {{$styles}}\n";
4457             }
4458         }
4459         foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4460             if ($styles != '' && (!$economy_mode ||
4461                 (isset($this->lexic_permissions['COMMENTS'][$group]) &&
4462                 $this->lexic_permissions['COMMENTS'][$group]) ||
4463                 (!empty($this->language_data['COMMENT_REGEXP']) &&
4464                 !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4465                 $stylesheet .= "$selector.co$group {{$styles}}\n";
4466             }
4467         }
4468         foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4469             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
4470                 // NEW: since 1.0.8 we have to handle hardescapes
4471                 if ($group === 'HARD') {
4472                     $group = '_h';
4473                 }
4474                 $stylesheet .= "$selector.es$group {{$styles}}\n";
4475             }
4476         }
4477         foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4478             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
4479                 $stylesheet .= "$selector.br$group {{$styles}}\n";
4480             }
4481         }
4482         foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4483             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
4484                 $stylesheet .= "$selector.sy$group {{$styles}}\n";
4485             }
4486         }
4487         foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4488             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
4489                 // NEW: since 1.0.8 we have to handle hardquotes
4490                 if ($group === 'HARD') {
4491                     $group = '_h';
4492                 }
4493                 $stylesheet .= "$selector.st$group {{$styles}}\n";
4494             }
4495         }
4496         foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4497             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
4498                 $stylesheet .= "$selector.nu$group {{$styles}}\n";
4499             }
4500         }
4501         foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4502             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
4503                 $stylesheet .= "$selector.me$group {{$styles}}\n";
4504             }
4505         }
4506         // note: neglect economy_mode, empty styles are meaningless
4507         foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4508             if ($styles != '') {
4509                 $stylesheet .= "$selector.sc$group {{$styles}}\n";
4510             }
4511         }
4512         foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4513             if ($styles != '' && (!$economy_mode ||
4514                 (isset($this->lexic_permissions['REGEXPS'][$group]) &&
4515                 $this->lexic_permissions['REGEXPS'][$group]))) {
4516                 if (is_array($this->language_data['REGEXPS'][$group]) &&
4517                     array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4518                     $stylesheet .= "$selector.";
4519                     $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4520                     $stylesheet .= " {{$styles}}\n";
4521                 } else {
4522                     $stylesheet .= "$selector.re$group {{$styles}}\n";
4523                 }
4524             }
4525         }
4526         // Styles for lines being highlighted extra
4527         if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4528             $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4529         }
4530         $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4531         foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4532             $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4533         }
4534 
4535         return $stylesheet;
4536     }
4537 
4538     /**
4539      * Get's the style that is used for the specified line
4540      *
4541      * @param int $line The line number information is requested for
4542      * @since 1.0.7.21
4543      */
4544     protected function get_line_style($line) {
4545         $style = null;
4546         if (isset($this->highlight_extra_lines_styles[$line])) {
4547             $style = $this->highlight_extra_lines_styles[$line];
4548         } else { // if no "extra" style assigned
4549             $style = $this->highlight_extra_lines_style;
4550         }
4551 
4552         return $style;
4553     }
4554 
4555     /**
4556     * this functions creates an optimized regular expression list
4557     * of an array of strings.
4558     *
4559     * Example:
4560     * <code>$list = array('faa', 'foo', 'foobar');
4561     *          => string 'f(aa|oo(bar)?)'</code>
4562     *
4563     * @param array  $list             array of (unquoted) strings
4564     * @param string $regexp_delimiter your regular expression delimiter, @see preg_quote()
4565     * @return string for regular expression
4566     * @author Milian Wolff <mail@milianw.de>
4567     * @since 1.0.8
4568     */
4569     protected function optimize_regexp_list($list, $regexp_delimiter = '/') {
4570         $regex_chars = array('.', '\\', '+', '-', '*', '?', '[', '^', ']', '$',
4571             '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4572         sort($list);
4573         $regexp_list = array('');
4574         $num_subpatterns = 0;
4575         $list_key = 0;
4576 
4577         // the tokens which we will use to generate the regexp list
4578         $tokens = array();
4579         $prev_keys = array();
4580         // go through all entries of the list and generate the token list
4581         $cur_len = 0;
4582         for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4583             if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4584                 // seems like the length of this pcre is growing exorbitantly
4585                 $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4586                 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4587                 $tokens = array();
4588                 $cur_len = 0;
4589             }
4590             $level = 0;
4591             $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4592             $pointer = &$tokens;
4593             // properly assign the new entry to the correct position in the token array
4594             // possibly generate smaller common denominator keys
4595             while (true) {
4596                 // get the common denominator
4597                 if (isset($prev_keys[$level])) {
4598                     if ($prev_keys[$level] == $entry) {
4599                         // this is a duplicate entry, skip it
4600                         continue 2;
4601                     }
4602                     $char = 0;
4603                     while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4604                             && $entry[$char] == $prev_keys[$level][$char]) {
4605                         ++$char;
4606                     }
4607                     if ($char > 0) {
4608                         // this entry has at least some chars in common with the current key
4609                         if ($char == strlen($prev_keys[$level])) {
4610                             // current key is totally matched, i.e. this entry has just some bits appended
4611                             $pointer = &$pointer[$prev_keys[$level]];
4612                         } else {
4613                             // only part of the keys match
4614                             $new_key_part1 = substr($prev_keys[$level], 0, $char);
4615                             $new_key_part2 = substr($prev_keys[$level], $char);
4616 
4617                             if (in_array($new_key_part1[0], $regex_chars)
4618                                 || in_array($new_key_part2[0], $regex_chars)) {
4619                                 // this is bad, a regex char as first character
4620                                 $pointer[$entry] = array('' => true);
4621                                 array_splice($prev_keys, $level, count($prev_keys), $entry);
4622                                 $cur_len += strlen($entry);
4623                                 continue;
4624                             } else {
4625                                 // relocate previous tokens
4626                                 $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4627                                 unset($pointer[$prev_keys[$level]]);
4628                                 $pointer = &$pointer[$new_key_part1];
4629                                 // recreate key index
4630                                 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4631                                 $cur_len += strlen($new_key_part2);
4632                             }
4633                         }
4634                         ++$level;
4635                         $entry = substr($entry, $char);
4636                         continue;
4637                     }
4638                     // else: fall trough, i.e. no common denominator was found
4639                 }
4640                 if ($level == 0 && !empty($tokens)) {
4641                     // we can dump current tokens into the string and throw them away afterwards
4642                     $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4643                     $new_subpatterns = substr_count($new_entry, '(?:');
4644                     if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4645                         $regexp_list[++$list_key] = $new_entry;
4646                         $num_subpatterns = $new_subpatterns;
4647                     } else {
4648                         if (!empty($regexp_list[$list_key])) {
4649                             $new_entry = '|' . $new_entry;
4650                         }
4651                         $regexp_list[$list_key] .= $new_entry;
4652                         $num_subpatterns += $new_subpatterns;
4653                     }
4654                     $tokens = array();
4655                     $cur_len = 0;
4656                 }
4657                 // no further common denominator found
4658                 $pointer[$entry] = array('' => true);
4659                 array_splice($prev_keys, $level, count($prev_keys), $entry);
4660 
4661                 $cur_len += strlen($entry);
4662                 break;
4663             }
4664             unset($list[$i]);
4665         }
4666         // make sure the last tokens get converted as well
4667         $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4668         if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4669             if ( !empty($regexp_list[$list_key]) ) {
4670               ++$list_key;
4671             }
4672             $regexp_list[$list_key] = $new_entry;
4673         } else {
4674             if (!empty($regexp_list[$list_key])) {
4675                 $new_entry = '|' . $new_entry;
4676             }
4677             $regexp_list[$list_key] .= $new_entry;
4678         }
4679         return $regexp_list;
4680     }
4681 
4682     /**
4683     * this function creates the appropriate regexp string of an token array
4684     * you should not call this function directly, @see $this->optimize_regexp_list().
4685     *
4686     * @param array $tokens   array of tokens
4687     * @param bool  $recursed to know wether we recursed or not
4688     * @return string
4689     * @author Milian Wolff <mail@milianw.de>
4690     * @since 1.0.8
4691     */
4692     protected function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4693         $list = '';
4694         foreach ($tokens as $token => $sub_tokens) {
4695             $list .= $token;
4696             $close_entry = isset($sub_tokens['']);
4697             unset($sub_tokens['']);
4698             if (!empty($sub_tokens)) {
4699                 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4700                 if ($close_entry) {
4701                     // make sub_tokens optional
4702                     $list .= '?';
4703                 }
4704             }
4705             $list .= '|';
4706         }
4707         if (!$recursed) {
4708             // do some optimizations
4709             // common trailing strings
4710             // BUGGY!
4711             //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4712             //    '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4713             // (?:p)? => p?
4714             $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4715             // (?:a|b|c|d|...)? => [abcd...]?
4716             // TODO: a|bb|c => [ac]|bb
4717             static $callback_2;
4718             if (!isset($callback_2)) {
4719                 $callback_2 = function($matches) {
4720                     return "[" . str_replace("|", "", $matches[1]) . "]";
4721                 };
4722             }
4723             $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
4724         }
4725         // return $list without trailing pipe
4726         return substr($list, 0, -1);
4727     }
4728 } // End Class GeSHi
4729 
4730 
4731 if (!function_exists('geshi_highlight')) {
4732     /**
4733      * Easy way to highlight stuff. Behaves just like highlight_string
4734      *
4735      * @param string $string   The code to highlight
4736      * @param string $language The language to highlight the code in
4737      * @param string $path     The path to the language files. You can leave this blank if you need
4738      *                         as from version 1.0.7 the path should be automatically detected
4739      * @param boolean $return  Whether to return the result or to echo
4740      * @return string The code highlighted (if $return is true)
4741      * @since 1.0.2
4742      */
4743     function geshi_highlight($string, $language, $path = null, $return = false) {
4744         $geshi = new GeSHi($string, $language, $path);
4745         $geshi->set_header_type(GESHI_HEADER_NONE);
4746 
4747         if ($return) {
4748             return '<code>' . $geshi->parse_code() . '</code>';
4749         }
4750 
4751         echo '<code>' . $geshi->parse_code() . '</code>';
4752 
4753         if ($geshi->error()) {
4754             return false;
4755         }
4756         return true;
4757     }
4758 }
4759