1<?php
2/**
3 * GeSHi - Generic Syntax Highlighter
4 *
5 * The GeSHi class for Generic Syntax Highlighting. Please refer to the
6 * documentation at http://qbnz.com/highlighter/documentation.php for more
7 * information about how to use this class.
8 *
9 * For changes, release notes, TODOs etc, see the relevant files in the docs/
10 * directory.
11 *
12 *   This file is part of GeSHi.
13 *
14 *  GeSHi is free software; you can redistribute it and/or modify
15 *  it under the terms of the GNU General Public License as published by
16 *  the Free Software Foundation; either version 2 of the License, or
17 *  (at your option) any later version.
18 *
19 *  GeSHi is distributed in the hope that it will be useful,
20 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22 *  GNU General Public License for more details.
23 *
24 *  You should have received a copy of the GNU General Public License
25 *  along with GeSHi; if not, write to the Free Software
26 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
27 *
28 * @package    geshi
29 * @subpackage core
30 * @author     Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
31 * @copyright  (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2014 Benny Baumann
32 * @license    http://gnu.org/copyleft/gpl.html GNU GPL
33 */
34
35//
36// GeSHi Constants
37// You should use these constant names in your programs instead of
38// their values - you never know when a value may change in a future
39// version
40//
41
42/** The version of this GeSHi file */
43define('GESHI_VERSION', '1.0.9.1');
44
45// Define the root directory for the GeSHi code tree
46if (!defined('GESHI_ROOT')) {
47    /** The root directory for GeSHi */
48    define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
49}
50/** The language file directory for GeSHi
51    @access private */
52define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
53
54// Define if GeSHi should be paranoid about security
55if (!defined('GESHI_SECURITY_PARANOID')) {
56    /** Tells GeSHi to be paranoid about security settings */
57    define('GESHI_SECURITY_PARANOID', false);
58}
59
60// Line numbers - use with enable_line_numbers()
61/** Use no line numbers when building the result */
62define('GESHI_NO_LINE_NUMBERS', 0);
63/** Use normal line numbers when building the result */
64define('GESHI_NORMAL_LINE_NUMBERS', 1);
65/** Use fancy line numbers when building the result */
66define('GESHI_FANCY_LINE_NUMBERS', 2);
67
68// Container HTML type
69/** Use nothing to surround the source */
70define('GESHI_HEADER_NONE', 0);
71/** Use a "div" to surround the source */
72define('GESHI_HEADER_DIV', 1);
73/** Use a "pre" to surround the source */
74define('GESHI_HEADER_PRE', 2);
75/** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
76define('GESHI_HEADER_PRE_VALID', 3);
77/**
78 * Use a "table" to surround the source:
79 *
80 *  <table>
81 *    <thead><tr><td colspan="2">$header</td></tr></thead>
82 *    <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
83 *    <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
84 *  </table>
85 *
86 * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
87 * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
88 * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
89 */
90define('GESHI_HEADER_PRE_TABLE', 4);
91
92// Capatalisation constants
93/** Lowercase keywords found */
94define('GESHI_CAPS_NO_CHANGE', 0);
95/** Uppercase keywords found */
96define('GESHI_CAPS_UPPER', 1);
97/** Leave keywords found as the case that they are */
98define('GESHI_CAPS_LOWER', 2);
99
100// Link style constants
101/** Links in the source in the :link state */
102define('GESHI_LINK', 0);
103/** Links in the source in the :hover state */
104define('GESHI_HOVER', 1);
105/** Links in the source in the :active state */
106define('GESHI_ACTIVE', 2);
107/** Links in the source in the :visited state */
108define('GESHI_VISITED', 3);
109
110// Important string starter/finisher
111// Note that if you change these, they should be as-is: i.e., don't
112// write them as if they had been run through htmlentities()
113/** The starter for important parts of the source */
114define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
115/** The ender for important parts of the source */
116define('GESHI_END_IMPORTANT', '<END GeSHi>');
117
118/**#@+
119 *  @access private
120 */
121// When strict mode applies for a language
122/** Strict mode never applies (this is the most common) */
123define('GESHI_NEVER', 0);
124/** Strict mode *might* apply, and can be enabled or
125    disabled by {@link GeSHi->enable_strict_mode()} */
126define('GESHI_MAYBE', 1);
127/** Strict mode always applies */
128define('GESHI_ALWAYS', 2);
129
130// Advanced regexp handling constants, used in language files
131/** The key of the regex array defining what to search for */
132define('GESHI_SEARCH', 0);
133/** The key of the regex array defining what bracket group in a
134    matched search to use as a replacement */
135define('GESHI_REPLACE', 1);
136/** The key of the regex array defining any modifiers to the regular expression */
137define('GESHI_MODIFIERS', 2);
138/** The key of the regex array defining what bracket group in a
139    matched search to put before the replacement */
140define('GESHI_BEFORE', 3);
141/** The key of the regex array defining what bracket group in a
142    matched search to put after the replacement */
143define('GESHI_AFTER', 4);
144/** The key of the regex array defining a custom keyword to use
145    for this regexp's html tag class */
146define('GESHI_CLASS', 5);
147
148/** Used in language files to mark comments */
149define('GESHI_COMMENTS', 0);
150
151/** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
152    regular expressions. Set this to false if your PCRE lib is up to date
153    @see GeSHi->optimize_regexp_list()
154    **/
155define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
156/** it's also important not to generate too long regular expressions
157    be generous here... but keep in mind, that when reaching this limit we
158    still have to close open patterns. 12k should do just fine on a 16k limit.
159    @see GeSHi->optimize_regexp_list()
160    **/
161define('GESHI_MAX_PCRE_LENGTH', 12288);
162
163//Number format specification
164/** Basic number format for integers */
165define('GESHI_NUMBER_INT_BASIC', 1);        //Default integers \d+
166/** Enhanced number format for integers like seen in C */
167define('GESHI_NUMBER_INT_CSTYLE', 2);       //Default C-Style \d+[lL]?
168/** Number format to highlight binary numbers with a suffix "b" */
169define('GESHI_NUMBER_BIN_SUFFIX', 16);           //[01]+[bB]
170/** Number format to highlight binary numbers with a prefix % */
171define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32);   //%[01]+
172/** Number format to highlight binary numbers with a prefix 0b (C) */
173define('GESHI_NUMBER_BIN_PREFIX_0B', 64);        //0b[01]+
174/** Number format to highlight octal numbers with a leading zero */
175define('GESHI_NUMBER_OCT_PREFIX', 256);           //0[0-7]+
176/** Number format to highlight octal numbers with a prefix 0o (logtalk) */
177define('GESHI_NUMBER_OCT_PREFIX_0O', 512);           //0[0-7]+
178/** Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series). */
179define('GESHI_NUMBER_OCT_PREFIX_AT', 1024);           //@[0-7]+
180/** Number format to highlight octal numbers with a suffix of o */
181define('GESHI_NUMBER_OCT_SUFFIX', 2048);           //[0-7]+[oO]
182/** Number format to highlight hex numbers with a prefix 0x */
183define('GESHI_NUMBER_HEX_PREFIX', 4096);           //0x[0-9a-fA-F]+
184/** Number format to highlight hex numbers with a prefix $ */
185define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192);           //$[0-9a-fA-F]+
186/** Number format to highlight hex numbers with a suffix of h */
187define('GESHI_NUMBER_HEX_SUFFIX', 16384);           //[0-9][0-9a-fA-F]*h
188/** Number format to highlight floating-point numbers without support for scientific notation */
189define('GESHI_NUMBER_FLT_NONSCI', 65536);          //\d+\.\d+
190/** Number format to highlight floating-point numbers without support for scientific notation */
191define('GESHI_NUMBER_FLT_NONSCI_F', 131072);       //\d+(\.\d+)?f
192/** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
193define('GESHI_NUMBER_FLT_SCI_SHORT', 262144);      //\.\d+e\d+
194/** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
195define('GESHI_NUMBER_FLT_SCI_ZERO', 524288);       //\d+(\.\d+)?e\d+
196//Custom formats are passed by RX array
197
198// Error detection - use these to analyse faults
199/** No sourcecode to highlight was specified
200 * @deprecated
201 */
202define('GESHI_ERROR_NO_INPUT', 1);
203/** The language specified does not exist */
204define('GESHI_ERROR_NO_SUCH_LANG', 2);
205/** GeSHi could not open a file for reading (generally a language file) */
206define('GESHI_ERROR_FILE_NOT_READABLE', 3);
207/** The header type passed to {@link GeSHi->set_header_type()} was invalid */
208define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
209/** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
210define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
211/**#@-*/
212
213
214/**
215 * The GeSHi Class.
216 *
217 * Please refer to the documentation for GeSHi 1.0.X that is available
218 * at http://qbnz.com/highlighter/documentation.php for more information
219 * about how to use this class.
220 *
221 * @package   geshi
222 * @author    Nigel McNie <nigel@geshi.org>
223 * @author    Benny Baumann <BenBE@omorphia.de>
224 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2014 Benny Baumann
225 */
226class GeSHi {
227
228    /**
229     * The source code to highlight
230     * @var string
231     */
232    protected $source = '';
233
234    /**
235     * The language to use when highlighting
236     * @var string
237     */
238    protected $language = '';
239
240    /**
241     * The data for the language used
242     * @var array
243     */
244    protected $language_data = array();
245
246    /**
247     * The path to the language files
248     * @var string
249     */
250    protected $language_path = GESHI_LANG_ROOT;
251
252    /**
253     * The error message associated with an error
254     * @var string
255     * @todo check err reporting works
256     */
257    protected $error = false;
258
259    /**
260     * Possible error messages
261     * @var array
262     */
263    protected $error_messages = array(
264        GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
265        GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
266        GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
267        GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
268    );
269
270    /**
271     * Whether highlighting is strict or not
272     * @var boolean
273     */
274    protected $strict_mode = false;
275
276    /**
277     * Whether to use CSS classes in output
278     * @var boolean
279     */
280    protected $use_classes = false;
281
282    /**
283     * The type of header to use. Can be one of the following
284     * values:
285     *
286     * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
287     * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
288     * - GESHI_HEADER_NONE: No header is outputted.
289     *
290     * @var int
291     */
292    protected $header_type = GESHI_HEADER_PRE;
293
294    /**
295     * Array of permissions for which lexics should be highlighted
296     * @var array
297     */
298    protected $lexic_permissions = array(
299        'KEYWORDS' =>    array(),
300        'COMMENTS' =>    array('MULTI' => true),
301        'REGEXPS' =>     array(),
302        'ESCAPE_CHAR' => true,
303        'BRACKETS' =>    true,
304        'SYMBOLS' =>     false,
305        'STRINGS' =>     true,
306        'NUMBERS' =>     true,
307        'METHODS' =>     true,
308        'SCRIPT' =>      true
309    );
310
311    /**
312     * The time it took to parse the code
313     * @var double
314     */
315    protected $time = 0;
316
317    /**
318     * The content of the header block
319     * @var string
320     */
321    protected $header_content = '';
322
323    /**
324     * The content of the footer block
325     * @var string
326     */
327    protected $footer_content = '';
328
329    /**
330     * The style of the header block
331     * @var string
332     */
333    protected $header_content_style = '';
334
335    /**
336     * The style of the footer block
337     * @var string
338     */
339    protected $footer_content_style = '';
340
341    /**
342     * Tells if a block around the highlighted source should be forced
343     * if not using line numbering
344     * @var boolean
345     */
346    protected $force_code_block = false;
347
348    /**
349     * The styles for hyperlinks in the code
350     * @var array
351     */
352    protected $link_styles = array();
353
354    /**
355     * Whether important blocks should be recognised or not
356     * @var boolean
357     * @deprecated
358     * @todo REMOVE THIS FUNCTIONALITY!
359     */
360    protected $enable_important_blocks = false;
361
362    /**
363     * Styles for important parts of the code
364     * @var string
365     * @deprecated
366     * @todo As above - rethink the whole idea of important blocks as it is buggy and
367     * will be hard to implement in 1.2
368     */
369    protected $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
370
371    /**
372     * Whether CSS IDs should be added to the code
373     * @var boolean
374     */
375    protected $add_ids = false;
376
377    /**
378     * Lines that should be highlighted extra
379     * @var array
380     */
381    protected $highlight_extra_lines = array();
382
383    /**
384     * Styles of lines that should be highlighted extra
385     * @var array
386     */
387    protected $highlight_extra_lines_styles = array();
388
389    /**
390     * Styles of extra-highlighted lines
391     * @var string
392     */
393    protected $highlight_extra_lines_style = 'background-color: #ffc;';
394
395    /**
396     * The line ending
397     * If null, nl2br() will be used on the result string.
398     * Otherwise, all instances of \n will be replaced with $line_ending
399     * @var string
400     */
401    protected $line_ending = null;
402
403    /**
404     * Number at which line numbers should start at
405     * @var int
406     */
407    protected $line_numbers_start = 1;
408
409    /**
410     * The overall style for this code block
411     * @var string
412     */
413    protected $overall_style = 'font-family:monospace;';
414
415    /**
416     *  The style for the actual code
417     * @var string
418     */
419    protected $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
420
421    /**
422     * The overall class for this code block
423     * @var string
424     */
425    protected $overall_class = '';
426
427    /**
428     * The overall ID for this code block
429     * @var string
430     */
431    protected $overall_id = '';
432
433    /**
434     * Line number styles
435     * @var string
436     */
437    protected $line_style1 = 'font-weight: normal; vertical-align:top;';
438
439    /**
440     * Line number styles for fancy lines
441     * @var string
442     */
443    protected $line_style2 = 'font-weight: bold; vertical-align:top;';
444
445    /**
446     * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
447     * @var string
448     */
449    protected $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
450
451    /**
452     * Flag for how line numbers are displayed
453     * @var boolean
454     */
455    protected $line_numbers = GESHI_NO_LINE_NUMBERS;
456
457    /**
458     * Flag to decide if multi line spans are allowed. Set it to false to make sure
459     * each tag is closed before and reopened after each linefeed.
460     * @var boolean
461     */
462    protected $allow_multiline_span = true;
463
464    /**
465     * The "nth" value for fancy line highlighting
466     * @var int
467     */
468    protected $line_nth_row = 0;
469
470    /**
471     * The size of tab stops
472     * @var int
473     */
474    protected $tab_width = 8;
475
476    /**
477     * Should we use language-defined tab stop widths?
478     * @var int
479     */
480    protected $use_language_tab_width = false;
481
482    /**
483     * Default target for keyword links
484     * @var string
485     */
486    protected $link_target = '';
487
488    /**
489     * The encoding to use for entity encoding
490     * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
491     * @var string
492     */
493    protected $encoding = 'utf-8';
494
495    /**
496     * Should keywords be linked?
497     * @var boolean
498     */
499    protected $keyword_links = true;
500
501    /**
502     * Currently loaded language file
503     * @var    string
504     * @since 1.0.7.22
505     */
506    protected $loaded_language = '';
507
508    /**
509     * Wether the caches needed for parsing are built or not
510     *
511     * @var   bool
512     * @since 1.0.8
513     */
514    protected $parse_cache_built = false;
515
516    /**
517     * Work around for Suhosin Patch with disabled /e modifier
518     *
519     * Note from suhosins author in config file:
520     * <blockquote>
521     *   The /e modifier inside <code>preg_replace()</code> allows code execution.
522     *   Often it is the cause for remote code execution exploits. It is wise to
523     *   deactivate this feature and test where in the application it is used.
524     *   The developer using the /e modifier should be made aware that he should
525     *   use <code>preg_replace_callback()</code> instead
526     * </blockquote>
527     *
528     * @var   array
529     * @since 1.0.8
530     */
531    protected $_kw_replace_group = 0;
532    protected $_rx_key = 0;
533
534    /**
535     * some "callback parameters" for handle_multiline_regexps
536     *
537     * @since  1.0.8
538     * @access private
539     * @var    string
540     */
541    protected $_hmr_before = '';
542    protected $_hmr_replace = '';
543    protected $_hmr_after = '';
544    protected $_hmr_key = 0;
545
546    /**
547     * Creates a new GeSHi object, with source and language
548     *
549     * @param string $source   The source code to highlight
550     * @param string $language The language to highlight the source with
551     * @param string $path     The path to the language file directory. <b>This
552     *               is deprecated!</b> I've backported the auto path
553     *               detection from the 1.1.X dev branch, so now it
554     *               should be automatically set correctly. If you have
555     *               renamed the language directory however, you will
556     *               still need to set the path using this parameter or
557     *               {@link GeSHi->set_language_path()}
558     * @since 1.0.0
559     */
560    public function __construct($source = '', $language = '', $path = '') {
561        if ( is_string($source) && ($source !== '') ) {
562            $this->set_source($source);
563        }
564        if ( is_string($language) && ($language !== '') ) {
565            $this->set_language($language);
566        }
567        $this->set_language_path($path);
568    }
569
570    /**
571     * Returns the version of GeSHi
572     *
573     * @return string
574     * @since  1.0.8.11
575     */
576    public function get_version()
577    {
578        return GESHI_VERSION;
579    }
580
581    /**
582     * Returns an error message associated with the last GeSHi operation,
583     * or false if no error has occurred
584     *
585     * @return string|false An error message if there has been an error, else false
586     * @since  1.0.0
587     */
588    public function error() {
589        if ($this->error) {
590            //Put some template variables for debugging here ...
591            $debug_tpl_vars = array(
592                '{LANGUAGE}' => $this->language,
593                '{PATH}' => $this->language_path
594            );
595            $msg = str_replace(
596                array_keys($debug_tpl_vars),
597                array_values($debug_tpl_vars),
598                $this->error_messages[$this->error]);
599
600            return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
601        }
602        return false;
603    }
604
605    /**
606     * Gets a human-readable language name (thanks to Simon Patterson
607     * for the idea :))
608     *
609     * @return string The name for the current language
610     * @since  1.0.2
611     */
612    public function get_language_name() {
613        if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
614            return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
615        }
616        return $this->language_data['LANG_NAME'];
617    }
618
619    /**
620     * Sets the source code for this object
621     *
622     * @param string $source The source code to highlight
623     * @since 1.0.0
624     */
625    public function set_source($source) {
626        $this->source = $source;
627        $this->highlight_extra_lines = array();
628    }
629
630    /**
631     * Clean up the language name to prevent malicious code injection
632     *
633     * @param string $language The name of the language to strip
634     * @since 1.0.9.1
635     */
636    public function strip_language_name($language) {
637        $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
638        $language = strtolower($language);
639
640        return $language;
641    }
642
643    /**
644     * Sets the language for this object
645     *
646     * @note since 1.0.8 this function won't reset language-settings by default anymore!
647     *       if you need this set $force_reset = true
648     *
649     * @param string $language    The name of the language to use
650     * @param bool   $force_reset
651     * @since 1.0.0
652     */
653    public function set_language($language, $force_reset = false) {
654        $this->error = false;
655        $this->strict_mode = GESHI_NEVER;
656
657        if ($force_reset) {
658            $this->loaded_language = false;
659        }
660
661        //Clean up the language name to prevent malicious code injection
662        $language = $this->strip_language_name($language);
663
664        //Retreive the full filename
665        $file_name = $this->language_path . $language . '.php';
666        if ($file_name == $this->loaded_language) {
667            // this language is already loaded!
668            return;
669        }
670
671        $this->language = $language;
672
673        //Check if we can read the desired file
674        if (!is_readable($file_name)) {
675            $this->error = GESHI_ERROR_NO_SUCH_LANG;
676            return;
677        }
678
679        // Load the language for parsing
680        $this->load_language($file_name);
681    }
682
683    /**
684     * Sets the path to the directory containing the language files. Note
685     * that this path is relative to the directory of the script that included
686     * geshi.php, NOT geshi.php itself.
687     *
688     * @param string $path The path to the language directory
689     * @since 1.0.0
690     * @deprecated The path to the language files should now be automatically
691     *             detected, so this method should no longer be needed. The
692     *             1.1.X branch handles manual setting of the path differently
693     *             so this method will disappear in 1.2.0.
694     */
695    public function set_language_path($path) {
696        if(strpos($path,':')) {
697            //Security Fix to prevent external directories using fopen wrappers.
698            if(DIRECTORY_SEPARATOR == "\\") {
699                if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
700                    return;
701                }
702            } else {
703                return;
704            }
705        }
706        if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
707            //Security Fix to prevent external directories using fopen wrappers.
708            return;
709        }
710        if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
711            //Security Fix to prevent external directories using fopen wrappers.
712            return;
713        }
714        if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
715            //Security Fix to prevent external directories using fopen wrappers.
716            return;
717        }
718        if ($path) {
719            $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
720            $this->set_language($this->language); // otherwise set_language_path has no effect
721        }
722    }
723
724    /**
725     * Get supported langs or an associative array lang=>full_name.
726     * @param boolean $full_names
727     * @return array
728     */
729    public function get_supported_languages($full_names=false)
730    {
731        // return array
732        $back = array();
733
734        // we walk the lang root
735        $dir = dir($this->language_path);
736
737        // foreach entry
738        while (false !== ($entry = $dir->read()))
739        {
740            $full_path = $this->language_path.$entry;
741
742            // Skip all dirs
743            if (is_dir($full_path)) {
744                continue;
745            }
746
747            // we only want lang.php files
748            if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) {
749                continue;
750            }
751
752            // Raw lang name is here
753            $langname = $matches[1];
754
755            // We want the fullname too?
756            if ($full_names === true)
757            {
758                if (false !== ($fullname = $this->get_language_fullname($langname)))
759                {
760                    $back[$langname] = $fullname; // we go associative
761                }
762            }
763            else
764            {
765                // just store raw langname
766                $back[] = $langname;
767            }
768        }
769
770        $dir->close();
771
772        return $back;
773    }
774
775    /**
776     * Get full_name for a lang or false.
777     * @param string $language short langname (html4strict for example)
778     * @return mixed
779     */
780    public function get_language_fullname($language)
781    {
782        //Clean up the language name to prevent malicious code injection
783        $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
784
785        $language = strtolower($language);
786
787        // get fullpath-filename for a langname
788        $fullpath = $this->language_path.$language.'.php';
789
790        // we need to get contents :S
791        if (false === ($data = file_get_contents($fullpath))) {
792            $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language);
793            return false;
794        }
795
796        // match the langname
797        if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+?)\'/', $data, $matches)) {
798            $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language);
799            return false;
800        }
801
802        // return fullname for langname
803        return stripcslashes($matches[1]);
804    }
805
806    /**
807     * Sets the type of header to be used.
808     *
809     * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
810     * means more source code but more control over tab width and line-wrapping.
811     * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
812     * control. Default is GESHI_HEADER_PRE.
813     *
814     * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
815     * should be outputted.
816     *
817     * @param int $type The type of header to be used
818     * @since 1.0.0
819     */
820    public function set_header_type($type) {
821        //Check if we got a valid header type
822        if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
823            GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
824            $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
825            return;
826        }
827
828        //Set that new header type
829        $this->header_type = $type;
830    }
831
832    /**
833     * Sets the styles for the code that will be outputted
834     * when this object is parsed. The style should be a
835     * string of valid stylesheet declarations
836     *
837     * @param string  $style             The overall style for the outputted code block
838     * @param boolean $preserve_defaults Whether to merge the styles with the current styles or not
839     * @since 1.0.0
840     */
841    public function set_overall_style($style, $preserve_defaults = false) {
842        if (!$preserve_defaults) {
843            $this->overall_style = $style;
844        } else {
845            $this->overall_style .= $style;
846        }
847    }
848
849    /**
850     * Sets the overall classname for this block of code. This
851     * class can then be used in a stylesheet to style this object's
852     * output
853     *
854     * @param string $class The class name to use for this block of code
855     * @since 1.0.0
856     */
857    public function set_overall_class($class) {
858        $this->overall_class = $class;
859    }
860
861    /**
862     * Sets the overall id for this block of code. This id can then
863     * be used in a stylesheet to style this object's output
864     *
865     * @param string $id The ID to use for this block of code
866     * @since 1.0.0
867     */
868    public function set_overall_id($id) {
869        $this->overall_id = $id;
870    }
871
872    /**
873     * Sets whether CSS classes should be used to highlight the source. Default
874     * is off, calling this method with no arguments will turn it on
875     *
876     * @param boolean $flag Whether to turn classes on or not
877     * @since 1.0.0
878     */
879    public function enable_classes($flag = true) {
880        $this->use_classes = ($flag) ? true : false;
881    }
882
883    /**
884     * Sets the style for the actual code. This should be a string
885     * containing valid stylesheet declarations. If $preserve_defaults is
886     * true, then styles are merged with the default styles, with the
887     * user defined styles having priority
888     *
889     * Note: Use this method to override any style changes you made to
890     * the line numbers if you are using line numbers, else the line of
891     * code will have the same style as the line number! Consult the
892     * GeSHi documentation for more information about this.
893     *
894     * @param string  $style             The style to use for actual code
895     * @param boolean $preserve_defaults Whether to merge the current styles with the new styles
896     * @since 1.0.2
897     */
898    public function set_code_style($style, $preserve_defaults = false) {
899        if (!$preserve_defaults) {
900            $this->code_style = $style;
901        } else {
902            $this->code_style .= $style;
903        }
904    }
905
906    /**
907     * Sets the styles for the line numbers.
908     *
909     * @param string         $style1 The style for the line numbers that are "normal"
910     * @param string|boolean $style2 If a string, this is the style of the line
911     *        numbers that are "fancy", otherwise if boolean then this
912     *        defines whether the normal styles should be merged with the
913     *        new normal styles or not
914     * @param boolean        $preserve_defaults If set, is the flag for whether to merge the "fancy"
915     *        styles with the current styles or not
916     * @since 1.0.2
917     */
918    public function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
919        //Check if we got 2 or three parameters
920        if (is_bool($style2)) {
921            $preserve_defaults = $style2;
922            $style2 = '';
923        }
924
925        //Actually set the new styles
926        if (!$preserve_defaults) {
927            $this->line_style1 = $style1;
928            $this->line_style2 = $style2;
929        } else {
930            $this->line_style1 .= $style1;
931            $this->line_style2 .= $style2;
932        }
933    }
934
935    /**
936     * Sets whether line numbers should be displayed.
937     *
938     * Valid values for the first parameter are:
939     *
940     *  - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
941     *  - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
942     *  - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
943     *
944     * For fancy line numbers, the second parameter is used to signal which lines
945     * are to be fancy. For example, if the value of this parameter is 5 then every
946     * 5th line will be fancy.
947     *
948     * @param int $flag    How line numbers should be displayed
949     * @param int $nth_row Defines which lines are fancy
950     * @since 1.0.0
951     */
952    public function enable_line_numbers($flag, $nth_row = 5) {
953        if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
954            && GESHI_FANCY_LINE_NUMBERS != $flag) {
955            $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
956        }
957        $this->line_numbers = $flag;
958        $this->line_nth_row = $nth_row;
959    }
960
961    /**
962     * Sets wether spans and other HTML markup generated by GeSHi can
963     * span over multiple lines or not. Defaults to true to reduce overhead.
964     * Set it to false if you want to manipulate the output or manually display
965     * the code in an ordered list.
966     *
967     * @param boolean $flag Wether multiline spans are allowed or not
968     * @since 1.0.7.22
969     */
970    public function enable_multiline_span($flag) {
971        $this->allow_multiline_span = (bool) $flag;
972    }
973
974    /**
975     * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
976     *
977     * @see enable_multiline_span
978     * @return bool
979     */
980    public function get_multiline_span() {
981        return $this->allow_multiline_span;
982    }
983
984    /**
985     * Sets the style for a keyword group. If $preserve_defaults is
986     * true, then styles are merged with the default styles, with the
987     * user defined styles having priority
988     *
989     * @param int     $key               The key of the keyword group to change the styles of
990     * @param string  $style             The style to make the keywords
991     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
992     *                to overwrite them
993     * @since 1.0.0
994     */
995    public function set_keyword_group_style($key, $style, $preserve_defaults = false) {
996        //Set the style for this keyword group
997        if('*' == $key) {
998            foreach($this->language_data['STYLES']['KEYWORDS'] as $_key => $_value) {
999                if (!$preserve_defaults) {
1000                    $this->language_data['STYLES']['KEYWORDS'][$_key] = $style;
1001                } else {
1002                    $this->language_data['STYLES']['KEYWORDS'][$_key] .= $style;
1003                }
1004            }
1005        } else {
1006            if (!$preserve_defaults) {
1007                $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
1008            } else {
1009                $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
1010            }
1011        }
1012
1013        //Update the lexic permissions
1014        if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
1015            $this->lexic_permissions['KEYWORDS'][$key] = true;
1016        }
1017    }
1018
1019    /**
1020     * Turns highlighting on/off for a keyword group
1021     *
1022     * @param int     $key  The key of the keyword group to turn on or off
1023     * @param boolean $flag Whether to turn highlighting for that group on or off
1024     * @since 1.0.0
1025     */
1026    public function set_keyword_group_highlighting($key, $flag = true) {
1027        $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
1028    }
1029
1030    /**
1031     * Sets the styles for comment groups.  If $preserve_defaults is
1032     * true, then styles are merged with the default styles, with the
1033     * user defined styles having priority
1034     *
1035     * @param int     $key               The key of the comment group to change the styles of
1036     * @param string  $style             The style to make the comments
1037     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1038     *                to overwrite them
1039     * @since 1.0.0
1040     */
1041    public function set_comments_style($key, $style, $preserve_defaults = false) {
1042        if('*' == $key) {
1043            foreach($this->language_data['STYLES']['COMMENTS'] as $_key => $_value) {
1044                if (!$preserve_defaults) {
1045                    $this->language_data['STYLES']['COMMENTS'][$_key] = $style;
1046                } else {
1047                    $this->language_data['STYLES']['COMMENTS'][$_key] .= $style;
1048                }
1049            }
1050        } else {
1051            if (!$preserve_defaults) {
1052                $this->language_data['STYLES']['COMMENTS'][$key] = $style;
1053            } else {
1054                $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
1055            }
1056        }
1057    }
1058
1059    /**
1060     * Turns highlighting on/off for comment groups
1061     *
1062     * @param int     $key  The key of the comment group to turn on or off
1063     * @param boolean $flag Whether to turn highlighting for that group on or off
1064     * @since 1.0.0
1065     */
1066    public function set_comments_highlighting($key, $flag = true) {
1067        $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
1068    }
1069
1070    /**
1071     * Sets the styles for escaped characters. If $preserve_defaults is
1072     * true, then styles are merged with the default styles, with the
1073     * user defined styles having priority
1074     *
1075     * @param string  $style             The style to make the escape characters
1076     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1077     *                                   to overwrite them
1078     * @param int     $group             Tells the group of symbols for which style should be set.
1079     * @since 1.0.0
1080     */
1081    public function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
1082        if (!$preserve_defaults) {
1083            $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
1084        } else {
1085            $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
1086        }
1087    }
1088
1089    /**
1090     * Turns highlighting on/off for escaped characters
1091     *
1092     * @param boolean $flag Whether to turn highlighting for escape characters on or off
1093     * @since 1.0.0
1094     */
1095    public function set_escape_characters_highlighting($flag = true) {
1096        $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1097    }
1098
1099    /**
1100     * Sets the styles for brackets. If $preserve_defaults is
1101     * true, then styles are merged with the default styles, with the
1102     * user defined styles having priority
1103     *
1104     * This method is DEPRECATED: use set_symbols_style instead.
1105     * This method will be removed in 1.2.X
1106     *
1107     * @param string  $style             The style to make the brackets
1108     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1109     *                to overwrite them
1110     * @since 1.0.0
1111     * @deprecated In favour of set_symbols_style
1112     */
1113    public function set_brackets_style($style, $preserve_defaults = false) {
1114        if (!$preserve_defaults) {
1115            $this->language_data['STYLES']['BRACKETS'][0] = $style;
1116        } else {
1117            $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1118        }
1119    }
1120
1121    /**
1122     * Turns highlighting on/off for brackets
1123     *
1124     * This method is DEPRECATED: use set_symbols_highlighting instead.
1125     * This method will be remove in 1.2.X
1126     *
1127     * @param boolean $flag Whether to turn highlighting for brackets on or off
1128     * @since 1.0.0
1129     * @deprecated In favour of set_symbols_highlighting
1130     */
1131    public function set_brackets_highlighting($flag) {
1132        $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1133    }
1134
1135    /**
1136     * Sets the styles for symbols. If $preserve_defaults is
1137     * true, then styles are merged with the default styles, with the
1138     * user defined styles having priority
1139     *
1140     * @param string  $style             The style to make the symbols
1141     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1142     *                                   to overwrite them
1143     * @param int     $group             Tells the group of symbols for which style should be set.
1144     * @since 1.0.1
1145     */
1146    public function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1147        // Update the style of symbols
1148        if (!$preserve_defaults) {
1149            $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1150        } else {
1151            $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1152        }
1153
1154        // For backward compatibility
1155        if (0 == $group) {
1156            $this->set_brackets_style ($style, $preserve_defaults);
1157        }
1158    }
1159
1160    /**
1161     * Turns highlighting on/off for symbols
1162     *
1163     * @param boolean $flag Whether to turn highlighting for symbols on or off
1164     * @since 1.0.0
1165     */
1166    public function set_symbols_highlighting($flag) {
1167        // Update lexic permissions for this symbol group
1168        $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1169
1170        // For backward compatibility
1171        $this->set_brackets_highlighting ($flag);
1172    }
1173
1174    /**
1175     * Sets the styles for strings. If $preserve_defaults is
1176     * true, then styles are merged with the default styles, with the
1177     * user defined styles having priority
1178     *
1179     * @param string  $style             The style to make the escape characters
1180     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1181     *                                   to overwrite them
1182     * @param int     $group             Tells the group of strings for which style should be set.
1183     * @since 1.0.0
1184     */
1185    public function set_strings_style($style, $preserve_defaults = false, $group = 0) {
1186        if (!$preserve_defaults) {
1187            $this->language_data['STYLES']['STRINGS'][$group] = $style;
1188        } else {
1189            $this->language_data['STYLES']['STRINGS'][$group] .= $style;
1190        }
1191    }
1192
1193    /**
1194     * Turns highlighting on/off for strings
1195     *
1196     * @param boolean $flag Whether to turn highlighting for strings on or off
1197     * @since 1.0.0
1198     */
1199    public function set_strings_highlighting($flag) {
1200        $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1201    }
1202
1203    /**
1204     * Sets the styles for strict code blocks. If $preserve_defaults is
1205     * true, then styles are merged with the default styles, with the
1206     * user defined styles having priority
1207     *
1208     * @param string  $style             The style to make the script blocks
1209     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1210     *                                   to overwrite them
1211     * @param int     $group             Tells the group of script blocks for which style should be set.
1212     * @since 1.0.8.4
1213     */
1214    public function set_script_style($style, $preserve_defaults = false, $group = 0) {
1215        // Update the style of symbols
1216        if (!$preserve_defaults) {
1217            $this->language_data['STYLES']['SCRIPT'][$group] = $style;
1218        } else {
1219            $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1220        }
1221    }
1222
1223    /**
1224     * Sets the styles for numbers. If $preserve_defaults is
1225     * true, then styles are merged with the default styles, with the
1226     * user defined styles having priority
1227     *
1228     * @param string  $style             The style to make the numbers
1229     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1230     *                                   to overwrite them
1231     * @param int     $group             Tells the group of numbers for which style should be set.
1232     * @since 1.0.0
1233     */
1234    public function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
1235        if (!$preserve_defaults) {
1236            $this->language_data['STYLES']['NUMBERS'][$group] = $style;
1237        } else {
1238            $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
1239        }
1240    }
1241
1242    /**
1243     * Turns highlighting on/off for numbers
1244     *
1245     * @param boolean $flag Whether to turn highlighting for numbers on or off
1246     * @since 1.0.0
1247     */
1248    public function set_numbers_highlighting($flag) {
1249        $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1250    }
1251
1252    /**
1253     * Sets the styles for methods. $key is a number that references the
1254     * appropriate "object splitter" - see the language file for the language
1255     * you are highlighting to get this number. If $preserve_defaults is
1256     * true, then styles are merged with the default styles, with the
1257     * user defined styles having priority
1258     *
1259     * @param int     $key               The key of the object splitter to change the styles of
1260     * @param string  $style             The style to make the methods
1261     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1262     *                                   to overwrite them
1263     * @since 1.0.0
1264     */
1265    public function set_methods_style($key, $style, $preserve_defaults = false) {
1266        if (!$preserve_defaults) {
1267            $this->language_data['STYLES']['METHODS'][$key] = $style;
1268        } else {
1269            $this->language_data['STYLES']['METHODS'][$key] .= $style;
1270        }
1271    }
1272
1273    /**
1274     * Turns highlighting on/off for methods
1275     *
1276     * @param boolean $flag Whether to turn highlighting for methods on or off
1277     * @since 1.0.0
1278     */
1279    public function set_methods_highlighting($flag) {
1280        $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1281    }
1282
1283    /**
1284     * Sets the styles for regexps. If $preserve_defaults is
1285     * true, then styles are merged with the default styles, with the
1286     * user defined styles having priority
1287     *
1288     * @param string  $key               The style to make the regular expression matches
1289     * @param boolean $style             Whether to merge the new styles with the old or just
1290     *                                   to overwrite them
1291     * @param bool    $preserve_defaults Whether to merge the new styles with the old or just
1292     *                                to overwrite them
1293     * @since 1.0.0
1294     */
1295    public function set_regexps_style($key, $style, $preserve_defaults = false) {
1296        if (!$preserve_defaults) {
1297            $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1298        } else {
1299            $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1300        }
1301    }
1302
1303    /**
1304     * Turns highlighting on/off for regexps
1305     *
1306     * @param int     $key  The key of the regular expression group to turn on or off
1307     * @param boolean $flag Whether to turn highlighting for the regular expression group on or off
1308     * @since 1.0.0
1309     */
1310    public function set_regexps_highlighting($key, $flag) {
1311        $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1312    }
1313
1314    /**
1315     * Sets whether a set of keywords are checked for in a case sensitive manner
1316     *
1317     * @param int     $key  The key of the keyword group to change the case sensitivity of
1318     * @param boolean $case Whether to check in a case sensitive manner or not
1319     * @since 1.0.0
1320     */
1321    public function set_case_sensitivity($key, $case) {
1322        $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1323    }
1324
1325    /**
1326     * Sets the case that keywords should use when found. Use the constants:
1327     *
1328     *  - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1329     *  - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1330     *  - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1331     *
1332     * @param int $case A constant specifying what to do with matched keywords
1333     * @since 1.0.1
1334     */
1335    public function set_case_keywords($case) {
1336        if (in_array($case, array(
1337            GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
1338            $this->language_data['CASE_KEYWORDS'] = $case;
1339        }
1340    }
1341
1342    /**
1343     * Sets how many spaces a tab is substituted for
1344     *
1345     * Widths below zero are ignored
1346     *
1347     * @param int $width The tab width
1348     * @since 1.0.0
1349     */
1350    public function set_tab_width($width) {
1351        $this->tab_width = intval($width);
1352
1353        //Check if it fit's the constraints:
1354        if ($this->tab_width < 1) {
1355            //Return it to the default
1356            $this->tab_width = 8;
1357        }
1358    }
1359
1360    /**
1361     * Sets whether or not to use tab-stop width specifed by language
1362     *
1363     * @param boolean $use Whether to use language-specific tab-stop widths
1364     * @since 1.0.7.20
1365     */
1366    public function set_use_language_tab_width($use) {
1367        $this->use_language_tab_width = (bool) $use;
1368    }
1369
1370    /**
1371     * Returns the tab width to use, based on the current language and user
1372     * preference
1373     *
1374     * @return int Tab width
1375     * @since 1.0.7.20
1376     */
1377    public function get_real_tab_width() {
1378        if (!$this->use_language_tab_width ||
1379            !isset($this->language_data['TAB_WIDTH'])) {
1380            return $this->tab_width;
1381        } else {
1382            return $this->language_data['TAB_WIDTH'];
1383        }
1384    }
1385
1386    /**
1387     * Enables/disables strict highlighting. Default is off, calling this
1388     * method without parameters will turn it on. See documentation
1389     * for more details on strict mode and where to use it.
1390     *
1391     * @param boolean $mode Whether to enable strict mode or not
1392     * @since 1.0.0
1393     */
1394    public function enable_strict_mode($mode = true) {
1395        if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1396            $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1397        }
1398    }
1399
1400    /**
1401     * Disables all highlighting
1402     *
1403     * @since 1.0.0
1404     * @todo  Rewrite with array traversal
1405     * @deprecated In favour of enable_highlighting
1406     */
1407    public function disable_highlighting() {
1408        $this->enable_highlighting(false);
1409    }
1410
1411    /**
1412     * Enables all highlighting
1413     *
1414     * The optional flag parameter was added in version 1.0.7.21 and can be used
1415     * to enable (true) or disable (false) all highlighting.
1416     *
1417     * @since 1.0.0
1418     * @param boolean $flag A flag specifying whether to enable or disable all highlighting
1419     * @todo  Rewrite with array traversal
1420     */
1421    public function enable_highlighting($flag = true) {
1422        $flag = $flag ? true : false;
1423        foreach ($this->lexic_permissions as $key => $value) {
1424            if (is_array($value)) {
1425                foreach ($value as $k => $v) {
1426                    $this->lexic_permissions[$key][$k] = $flag;
1427                }
1428            } else {
1429                $this->lexic_permissions[$key] = $flag;
1430            }
1431        }
1432
1433        // Context blocks
1434        $this->enable_important_blocks = $flag;
1435    }
1436
1437    /**
1438     * Given a file extension, this method returns either a valid geshi language
1439     * name, or the empty string if it couldn't be found
1440     *
1441     * @param string $extension The extension to get a language name for
1442     * @param array  $lookup    A lookup array to use instead of the default one
1443     * @since 1.0.5
1444     * @todo Re-think about how this method works (maybe make it private and/or make it
1445     *       a extension->lang lookup?)
1446     * @return int|string
1447     */
1448    public static function get_language_name_from_extension( $extension, $lookup = array() ) {
1449        $extension = strtolower($extension);
1450
1451        if ( !is_array($lookup) || empty($lookup)) {
1452            $lookup = array(
1453                '6502acme' => array( 'a', 's', 'asm', 'inc' ),
1454                '6502tasm' => array( 'a', 's', 'asm', 'inc' ),
1455                '6502kickass' => array( 'a', 's', 'asm', 'inc' ),
1456                '68000devpac' => array( 'a', 's', 'asm', 'inc' ),
1457                'abap' => array('abap'),
1458                'actionscript' => array('as'),
1459                'ada' => array('a', 'ada', 'adb', 'ads'),
1460                'apache' => array('conf'),
1461                'asm' => array('ash', 'asm', 'inc'),
1462                'asp' => array('asp'),
1463                'bash' => array('sh'),
1464                'bf' => array('bf'),
1465                'c' => array('c', 'h'),
1466                'c_mac' => array('c', 'h'),
1467                'caddcl' => array(),
1468                'cadlisp' => array(),
1469                'cdfg' => array('cdfg'),
1470                'cobol' => array('cbl'),
1471                'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1472                'csharp' => array('cs'),
1473                'css' => array('css'),
1474                'd' => array('d'),
1475                'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1476                'diff' => array('diff', 'patch'),
1477                'dos' => array('bat', 'cmd'),
1478                'gdb' => array('kcrash', 'crash', 'bt'),
1479                'gettext' => array('po', 'pot'),
1480                'gml' => array('gml'),
1481                'gnuplot' => array('plt'),
1482                'groovy' => array('groovy'),
1483                'haskell' => array('hs'),
1484                'haxe' => array('hx'),
1485                'html4strict' => array('html', 'htm'),
1486                'ini' => array('ini', 'desktop', 'vbp'),
1487                'java' => array('java'),
1488                'javascript' => array('js'),
1489                'klonec' => array('kl1'),
1490                'klonecpp' => array('klx'),
1491                'latex' => array('tex'),
1492                'lisp' => array('lisp'),
1493                'lua' => array('lua'),
1494                'matlab' => array('m'),
1495                'mpasm' => array(),
1496                'mysql' => array('sql'),
1497                'nsis' => array(),
1498                'objc' => array(),
1499                'oobas' => array(),
1500                'oracle8' => array(),
1501                'oracle10' => array(),
1502                'pascal' => array('pas'),
1503                'perl' => array('pl', 'pm'),
1504                'php' => array('php', 'php5', 'phtml', 'phps'),
1505                'povray' => array('pov'),
1506                'providex' => array('pvc', 'pvx'),
1507                'prolog' => array('pl'),
1508                'python' => array('py'),
1509                'qbasic' => array('bi'),
1510                'reg' => array('reg'),
1511                'ruby' => array('rb'),
1512                'sas' => array('sas'),
1513                'scala' => array('scala'),
1514                'scheme' => array('scm'),
1515                'scilab' => array('sci'),
1516                'smalltalk' => array('st'),
1517                'smarty' => array(),
1518                'tcl' => array('tcl'),
1519                'text' => array('txt'),
1520                'vb' => array('bas', 'ctl', 'frm'),
1521                'vbnet' => array('vb', 'sln'),
1522                'visualfoxpro' => array(),
1523                'whitespace' => array('ws'),
1524                'xml' => array('xml', 'svg', 'xrc', 'vbproj', 'csproj', 'userprefs', 'resx', 'stetic', 'settings', 'manifest', 'myapp'),
1525                'z80' => array('z80', 'asm', 'inc')
1526            );
1527        }
1528
1529        foreach ($lookup as $lang => $extensions) {
1530            if (in_array($extension, $extensions)) {
1531                return $lang;
1532            }
1533        }
1534
1535        return 'text';
1536    }
1537
1538    /**
1539     * Given a file name, this method loads its contents in, and attempts
1540     * to set the language automatically. An optional lookup table can be
1541     * passed for looking up the language name. If not specified a default
1542     * table is used
1543     *
1544     * The language table is in the form
1545     * <pre>array(
1546     *   'lang_name' => array('extension', 'extension', ...),
1547     *   'lang_name' ...
1548     * );</pre>
1549     *
1550     * @param string $file_name The filename to load the source from
1551     * @param array  $lookup    A lookup array to use instead of the default one
1552     * @todo Complete rethink of this and above method
1553     * @since 1.0.5
1554     */
1555    public function load_from_file($file_name, $lookup = array()) {
1556        if (is_readable($file_name)) {
1557            $this->set_source(file_get_contents($file_name));
1558            $this->set_language(self::get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1559        } else {
1560            $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1561        }
1562    }
1563
1564    /**
1565     * Adds a keyword to a keyword group for highlighting
1566     *
1567     * @param int    $key  The key of the keyword group to add the keyword to
1568     * @param string $word The word to add to the keyword group
1569     * @since 1.0.0
1570     */
1571    public function add_keyword($key, $word) {
1572        if (!is_array($this->language_data['KEYWORDS'][$key])) {
1573            $this->language_data['KEYWORDS'][$key] = array();
1574        }
1575        if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1576            $this->language_data['KEYWORDS'][$key][] = $word;
1577
1578            //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1579            if ($this->parse_cache_built) {
1580                $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1581                $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1582            }
1583        }
1584    }
1585
1586    /**
1587     * Removes a keyword from a keyword group
1588     *
1589     * @param int    $key       The key of the keyword group to remove the keyword from
1590     * @param string $word      The word to remove from the keyword group
1591     * @param bool   $recompile Wether to automatically recompile the optimized regexp list or not.
1592     *               Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1593     *               for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1594     *               or the removed keyword will stay in cache and still be highlighted! On the other hand
1595     *               it might be too expensive to recompile the regexp list for every removal if you want to
1596     *               remove a lot of keywords.
1597     * @since 1.0.0
1598     */
1599    public function remove_keyword($key, $word, $recompile = true) {
1600        $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1601        if ($key_to_remove !== false) {
1602            unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1603
1604            //NEW in 1.0.8, optionally recompile keyword group
1605            if ($recompile && $this->parse_cache_built) {
1606                $this->optimize_keyword_group($key);
1607            }
1608        }
1609    }
1610
1611    /**
1612     * Creates a new keyword group
1613     *
1614     * @param int     $key            The key of the keyword group to create
1615     * @param string  $styles         The styles for the keyword group
1616     * @param boolean $case_sensitive Whether the keyword group is case sensitive ornot
1617     * @param array   $words          The words to use for the keyword group
1618     * @since 1.0.0
1619     * @return bool
1620     */
1621    public function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1622        $words = (array) $words;
1623        if  (empty($words)) {
1624            // empty word lists mess up highlighting
1625            return false;
1626        }
1627
1628        //Add the new keyword group internally
1629        $this->language_data['KEYWORDS'][$key] = $words;
1630        $this->lexic_permissions['KEYWORDS'][$key] = true;
1631        $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1632        $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1633
1634        //NEW in 1.0.8, cache keyword regexp
1635        if ($this->parse_cache_built) {
1636            $this->optimize_keyword_group($key);
1637        }
1638        return true;
1639    }
1640
1641    /**
1642     * Removes a keyword group
1643     *
1644     * @param int $key The key of the keyword group to remove
1645     * @since 1.0.0
1646     */
1647    public function remove_keyword_group ($key) {
1648        //Remove the keyword group internally
1649        unset($this->language_data['KEYWORDS'][$key]);
1650        unset($this->lexic_permissions['KEYWORDS'][$key]);
1651        unset($this->language_data['CASE_SENSITIVE'][$key]);
1652        unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1653
1654        //NEW in 1.0.8
1655        unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1656    }
1657
1658    /**
1659     * compile optimized regexp list for keyword group
1660     *
1661     * @param int $key The key of the keyword group to compile & optimize
1662     * @since 1.0.8
1663     */
1664    public function optimize_keyword_group($key) {
1665        $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1666            $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1667        $space_as_whitespace = false;
1668        if(isset($this->language_data['PARSER_CONTROL'])) {
1669            if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1670                if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1671                    $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1672                }
1673                if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1674                    if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1675                        $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1676                    }
1677                }
1678            }
1679        }
1680        if($space_as_whitespace) {
1681            foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1682                $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1683                    str_replace(" ", "\\s+", $rxv);
1684            }
1685        }
1686    }
1687
1688    /**
1689     * Sets the content of the header block
1690     *
1691     * @param string $content The content of the header block
1692     * @since 1.0.2
1693     */
1694    public function set_header_content($content) {
1695        $this->header_content = $content;
1696    }
1697
1698    /**
1699     * Sets the content of the footer block
1700     *
1701     * @param string $content The content of the footer block
1702     * @since 1.0.2
1703     */
1704    public function set_footer_content($content) {
1705        $this->footer_content = $content;
1706    }
1707
1708    /**
1709     * Sets the style for the header content
1710     *
1711     * @param string $style The style for the header content
1712     * @since 1.0.2
1713     */
1714    public function set_header_content_style($style) {
1715        $this->header_content_style = $style;
1716    }
1717
1718    /**
1719     * Sets the style for the footer content
1720     *
1721     * @param string $style The style for the footer content
1722     * @since 1.0.2
1723     */
1724    public function set_footer_content_style($style) {
1725        $this->footer_content_style = $style;
1726    }
1727
1728    /**
1729     * Sets whether to force a surrounding block around
1730     * the highlighted code or not
1731     *
1732     * @param boolean $flag Tells whether to enable or disable this feature
1733     * @since 1.0.7.20
1734     */
1735    public function enable_inner_code_block($flag) {
1736        $this->force_code_block = (bool)$flag;
1737    }
1738
1739    /**
1740     * Sets the base URL to be used for keywords
1741     *
1742     * @param int    $group The key of the keyword group to set the URL for
1743     * @param string $url   The URL to set for the group. If {FNAME} is in
1744     *                      the url somewhere, it is replaced by the keyword
1745     *                      that the URL is being made for
1746     * @since 1.0.2
1747     */
1748    public function set_url_for_keyword_group($group, $url) {
1749        $this->language_data['URLS'][$group] = $url;
1750    }
1751
1752    /**
1753     * Sets styles for links in code
1754     *
1755     * @param int    $type   A constant that specifies what state the style is being
1756     *                       set for - e.g. :hover or :visited
1757     * @param string $styles The styles to use for that state
1758     * @since 1.0.2
1759     */
1760    public function set_link_styles($type, $styles) {
1761        $this->link_styles[$type] = $styles;
1762    }
1763
1764    /**
1765     * Sets the target for links in code
1766     *
1767     * @param string $target The target for links in the code, e.g. _blank
1768     * @since 1.0.3
1769     */
1770    public function set_link_target($target) {
1771        if (!$target) {
1772            $this->link_target = '';
1773        } else {
1774            $this->link_target = ' target="' . $target . '"';
1775        }
1776    }
1777
1778    /**
1779     * Sets styles for important parts of the code
1780     *
1781     * @param string $styles The styles to use on important parts of the code
1782     * @since 1.0.2
1783     */
1784    public function set_important_styles($styles) {
1785        $this->important_styles = $styles;
1786    }
1787
1788    /**
1789     * Sets whether context-important blocks are highlighted
1790     *
1791     * @param boolean $flag Tells whether to enable or disable highlighting of important blocks
1792     * @todo REMOVE THIS SHIZ FROM GESHI!
1793     * @deprecated
1794     * @since 1.0.2
1795     */
1796    public function enable_important_blocks($flag) {
1797        $this->enable_important_blocks = ( $flag ) ? true : false;
1798    }
1799
1800    /**
1801     * Whether CSS IDs should be added to each line
1802     *
1803     * @param boolean $flag If true, IDs will be added to each line.
1804     * @since 1.0.2
1805     */
1806    public function enable_ids($flag = true) {
1807        $this->add_ids = ($flag) ? true : false;
1808    }
1809
1810    /**
1811     * Specifies which lines to highlight extra
1812     *
1813     * The extra style parameter was added in 1.0.7.21.
1814     *
1815     * @param mixed  $lines An array of line numbers to highlight, or just a line
1816     *                      number on its own.
1817     * @param string $style A string specifying the style to use for this line.
1818     *                      If null is specified, the default style is used.
1819     *                      If false is specified, the line will be removed from
1820     *                      special highlighting
1821     * @since 1.0.2
1822     * @todo  Some data replication here that could be cut down on
1823     */
1824    public function highlight_lines_extra($lines, $style = null) {
1825        if (is_array($lines)) {
1826            //Split up the job using single lines at a time
1827            foreach ($lines as $line) {
1828                $this->highlight_lines_extra($line, $style);
1829            }
1830        } else {
1831            //Mark the line as being highlighted specially
1832            $lines = intval($lines);
1833            $this->highlight_extra_lines[$lines] = $lines;
1834
1835            //Decide on which style to use
1836            if ($style === null) { //Check if we should use default style
1837                unset($this->highlight_extra_lines_styles[$lines]);
1838            } elseif ($style === false) { //Check if to remove this line
1839                unset($this->highlight_extra_lines[$lines]);
1840                unset($this->highlight_extra_lines_styles[$lines]);
1841            } else {
1842                $this->highlight_extra_lines_styles[$lines] = $style;
1843            }
1844        }
1845    }
1846
1847    /**
1848     * Sets the style for extra-highlighted lines
1849     *
1850     * @param string $styles The style for extra-highlighted lines
1851     * @since 1.0.2
1852     */
1853    public function set_highlight_lines_extra_style($styles) {
1854        $this->highlight_extra_lines_style = $styles;
1855    }
1856
1857    /**
1858     * Sets the line-ending
1859     *
1860     * @param string $line_ending The new line-ending
1861     * @since 1.0.2
1862     */
1863    public function set_line_ending($line_ending) {
1864        $this->line_ending = (string)$line_ending;
1865    }
1866
1867    /**
1868     * Sets what number line numbers should start at. Should
1869     * be a positive integer, and will be converted to one.
1870     *
1871     * <b>Warning:</b> Using this method will add the "start"
1872     * attribute to the &lt;ol&gt; that is used for line numbering.
1873     * This is <b>not</b> valid XHTML strict, so if that's what you
1874     * care about then don't use this method. Firefox is getting
1875     * support for the CSS method of doing this in 1.1 and Opera
1876     * has support for the CSS method, but (of course) IE doesn't
1877     * so it's not worth doing it the CSS way yet.
1878     *
1879     * @param int $number The number to start line numbers at
1880     * @since 1.0.2
1881     */
1882    public function start_line_numbers_at($number) {
1883        $this->line_numbers_start = abs(intval($number));
1884    }
1885
1886    /**
1887     * Sets the encoding used for htmlspecialchars(), for international
1888     * support.
1889     *
1890     * NOTE: This is not needed for now because htmlspecialchars() is not
1891     * being used (it has a security hole in PHP4 that has not been patched).
1892     * Maybe in a future version it may make a return for speed reasons, but
1893     * I doubt it.
1894     *
1895     * @param string $encoding The encoding to use for the source
1896     * @since 1.0.3
1897     */
1898    public function set_encoding($encoding) {
1899        if ($encoding) {
1900          $this->encoding = strtolower($encoding);
1901        }
1902    }
1903
1904    /**
1905     * Turns linking of keywords on or off.
1906     *
1907     * @param boolean $enable If true, links will be added to keywords
1908     * @since 1.0.2
1909     */
1910    public function enable_keyword_links($enable = true) {
1911        $this->keyword_links = (bool) $enable;
1912    }
1913
1914    /**
1915     * Setup caches needed for styling. This is automatically called in
1916     * parse_code() and get_stylesheet() when appropriate. This function helps
1917     * stylesheet generators as they rely on some style information being
1918     * preprocessed
1919     *
1920     * @since 1.0.8
1921     */
1922    protected function build_style_cache() {
1923        //Build the style cache needed to highlight numbers appropriate
1924        if($this->lexic_permissions['NUMBERS']) {
1925            //First check what way highlighting information for numbers are given
1926            if(!isset($this->language_data['NUMBERS'])) {
1927                $this->language_data['NUMBERS'] = 0;
1928            }
1929
1930            if(is_array($this->language_data['NUMBERS'])) {
1931                $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1932            } else {
1933                $this->language_data['NUMBERS_CACHE'] = array();
1934                if(!$this->language_data['NUMBERS']) {
1935                    $this->language_data['NUMBERS'] =
1936                        GESHI_NUMBER_INT_BASIC |
1937                        GESHI_NUMBER_FLT_NONSCI;
1938                }
1939
1940                for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1941                    //Rearrange style indices if required ...
1942                    if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1943                        $this->language_data['STYLES']['NUMBERS'][$i] =
1944                            $this->language_data['STYLES']['NUMBERS'][1<<$i];
1945                        unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1946                    }
1947
1948                    //Check if this bit is set for highlighting
1949                    if($j&1) {
1950                        //So this bit is set ...
1951                        //Check if it belongs to group 0 or the actual stylegroup
1952                        if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1953                            $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1954                        } else {
1955                            if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1956                                $this->language_data['NUMBERS_CACHE'][0] = 0;
1957                            }
1958                            $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1959                        }
1960                    }
1961                }
1962            }
1963        }
1964    }
1965
1966    /**
1967     * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1968     * This function makes stylesheet generators much faster as they do not need these caches.
1969     *
1970     * @since 1.0.8
1971     */
1972    protected function build_parse_cache() {
1973        // check whether language_data is available
1974        if (empty($this->language_data)) {
1975            return false;
1976        }
1977
1978        // cache symbol regexp
1979        //As this is a costy operation, we avoid doing it for multiple groups ...
1980        //Instead we perform it for all symbols at once.
1981        //
1982        //For this to work, we need to reorganize the data arrays.
1983        if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1984            $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1985
1986            $this->language_data['SYMBOL_DATA'] = array();
1987            $symbol_preg_multi = array(); // multi char symbols
1988            $symbol_preg_single = array(); // single char symbols
1989            foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1990                if (is_array($symbols)) {
1991                    foreach ($symbols as $sym) {
1992                        $sym = $this->hsc($sym);
1993                        if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1994                            $this->language_data['SYMBOL_DATA'][$sym] = $key;
1995                            if (isset($sym[1])) { // multiple chars
1996                                $symbol_preg_multi[] = preg_quote($sym, '/');
1997                            } else { // single char
1998                                if ($sym == '-') {
1999                                    // don't trigger range out of order error
2000                                    $symbol_preg_single[] = '\-';
2001                                } else {
2002                                    $symbol_preg_single[] = preg_quote($sym, '/');
2003                                }
2004                            }
2005                        }
2006                    }
2007                } else {
2008                    $symbols = $this->hsc($symbols);
2009                    if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
2010                        $this->language_data['SYMBOL_DATA'][$symbols] = 0;
2011                        if (isset($symbols[1])) { // multiple chars
2012                            $symbol_preg_multi[] = preg_quote($symbols, '/');
2013                        } elseif ($symbols == '-') {
2014                            // don't trigger range out of order error
2015                            $symbol_preg_single[] = '\-';
2016                        } else { // single char
2017                            $symbol_preg_single[] = preg_quote($symbols, '/');
2018                        }
2019                    }
2020                }
2021            }
2022
2023            //Now we have an array with each possible symbol as the key and the style as the actual data.
2024            //This way we can set the correct style just the moment we highlight ...
2025            //
2026            //Now we need to rewrite our array to get a search string that
2027            $symbol_preg = array();
2028            if (!empty($symbol_preg_multi)) {
2029                rsort($symbol_preg_multi);
2030                $symbol_preg[] = implode('|', $symbol_preg_multi);
2031            }
2032            if (!empty($symbol_preg_single)) {
2033                rsort($symbol_preg_single);
2034                $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
2035            }
2036            $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
2037        }
2038
2039        // cache optimized regexp for keyword matching
2040        // remove old cache
2041        $this->language_data['CACHED_KEYWORD_LISTS'] = array();
2042        foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
2043            if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
2044                    $this->lexic_permissions['KEYWORDS'][$key]) {
2045                $this->optimize_keyword_group($key);
2046            }
2047        }
2048
2049        // brackets
2050        if ($this->lexic_permissions['BRACKETS']) {
2051            $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
2052            if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
2053                $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2054                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
2055                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
2056                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
2057                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
2058                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
2059                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
2060                );
2061            }
2062            else {
2063                $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2064                    '<| class="br0">&#91;|>',
2065                    '<| class="br0">&#93;|>',
2066                    '<| class="br0">&#40;|>',
2067                    '<| class="br0">&#41;|>',
2068                    '<| class="br0">&#123;|>',
2069                    '<| class="br0">&#125;|>',
2070                );
2071            }
2072        }
2073
2074        //Build the parse cache needed to highlight numbers appropriate
2075        if($this->lexic_permissions['NUMBERS']) {
2076            //Check if the style rearrangements have been processed ...
2077            //This also does some preprocessing to check which style groups are useable ...
2078            if(!isset($this->language_data['NUMBERS_CACHE'])) {
2079                $this->build_style_cache();
2080            }
2081
2082            //Number format specification
2083            //All this formats are matched case-insensitively!
2084            static $numbers_format = array(
2085                GESHI_NUMBER_INT_BASIC =>
2086                    '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2087                GESHI_NUMBER_INT_CSTYLE =>
2088                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2089                GESHI_NUMBER_BIN_SUFFIX =>
2090                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2091                GESHI_NUMBER_BIN_PREFIX_PERCENT =>
2092                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2093                GESHI_NUMBER_BIN_PREFIX_0B =>
2094                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2095                GESHI_NUMBER_OCT_PREFIX =>
2096                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2097                GESHI_NUMBER_OCT_PREFIX_0O =>
2098                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2099                GESHI_NUMBER_OCT_PREFIX_AT =>
2100                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2101                GESHI_NUMBER_OCT_SUFFIX =>
2102                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2103                GESHI_NUMBER_HEX_PREFIX =>
2104                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2105                GESHI_NUMBER_HEX_PREFIX_DOLLAR =>
2106                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2107                GESHI_NUMBER_HEX_SUFFIX =>
2108                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2109                GESHI_NUMBER_FLT_NONSCI =>
2110                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2111                GESHI_NUMBER_FLT_NONSCI_F =>
2112                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2113                GESHI_NUMBER_FLT_SCI_SHORT =>
2114                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2115                GESHI_NUMBER_FLT_SCI_ZERO =>
2116                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
2117                );
2118
2119            //At this step we have an associative array with flag groups for a
2120            //specific style or an string denoting a regexp given its index.
2121            $this->language_data['NUMBERS_RXCACHE'] = array();
2122            foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2123                if(is_string($rxdata)) {
2124                    $regexp = $rxdata;
2125                } else {
2126                    //This is a bitfield of number flags to highlight:
2127                    //Build an array, implode them together and make this the actual RX
2128                    $rxuse = array();
2129                    for($i = 1; $i <= $rxdata; $i<<=1) {
2130                        if($rxdata & $i) {
2131                            $rxuse[] = $numbers_format[$i];
2132                        }
2133                    }
2134                    $regexp = implode("|", $rxuse);
2135                }
2136
2137                $this->language_data['NUMBERS_RXCACHE'][$key] =
2138                    "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i";
2139            }
2140
2141            if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) {
2142                $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#';
2143            }
2144        }
2145
2146        $this->parse_cache_built = true;
2147    }
2148
2149    /**
2150     * Returns the code in $this->source, highlighted and surrounded by the
2151     * nessecary HTML.
2152     *
2153     * This should only be called ONCE, cos it's SLOW! If you want to highlight
2154     * the same source multiple times, you're better off doing a whole lot of
2155     * str_replaces to replace the &lt;span&gt;s
2156     *
2157     * @since 1.0.0
2158     */
2159    public function parse_code() {
2160        // Start the timer
2161        $start_time = microtime();
2162
2163        // Replace all newlines to a common form.
2164        $code = str_replace("\r\n", "\n", $this->source);
2165        $code = str_replace("\r", "\n", $code);
2166
2167        // check whether language_data is available
2168        if (empty($this->language_data)) {
2169            $this->error = GESHI_ERROR_NO_SUCH_LANG;
2170        }
2171
2172        // Firstly, if there is an error, we won't highlight
2173        if ($this->error) {
2174            //Escape the source for output
2175            $result = $this->hsc($this->source);
2176
2177            //This fix is related to SF#1923020, but has to be applied regardless of
2178            //actually highlighting symbols.
2179            $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2180
2181            // Timing is irrelevant
2182            $this->set_time($start_time, $start_time);
2183            $this->finalise($result);
2184            return $result;
2185        }
2186
2187        // make sure the parse cache is up2date
2188        if (!$this->parse_cache_built) {
2189            $this->build_parse_cache();
2190        }
2191
2192        // Initialise various stuff
2193        $length           = strlen($code);
2194        $COMMENT_MATCHED  = false;
2195        $stuff_to_parse   = '';
2196        $endresult        = '';
2197
2198        // "Important" selections are handled like multiline comments
2199        // @todo GET RID OF THIS SHIZ
2200        if ($this->enable_important_blocks) {
2201            $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2202        }
2203
2204        if ($this->strict_mode) {
2205            // Break the source into bits. Each bit will be a portion of the code
2206            // within script delimiters - for example, HTML between < and >
2207            $k = 0;
2208            $parts = array();
2209            $matches = array();
2210            $next_match_pointer = null;
2211            // we use a copy to unset delimiters on demand (when they are not found)
2212            $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2213            $i = 0;
2214            while ($i < $length) {
2215                $next_match_pos = $length + 1; // never true
2216                foreach ($delim_copy as $dk => $delimiters) {
2217                    if(is_array($delimiters)) {
2218                        foreach ($delimiters as $open => $close) {
2219                            // make sure the cache is setup properly
2220                            if (!isset($matches[$dk][$open])) {
2221                                $matches[$dk][$open] = array(
2222                                    'next_match' => -1,
2223                                    'dk' => $dk,
2224
2225                                    'open' => $open, // needed for grouping of adjacent code blocks (see below)
2226                                    'open_strlen' => strlen($open),
2227
2228                                    'close' => $close,
2229                                    'close_strlen' => strlen($close),
2230                                );
2231                            }
2232                            // Get the next little bit for this opening string
2233                            if ($matches[$dk][$open]['next_match'] < $i) {
2234                                // only find the next pos if it was not already cached
2235                                $open_pos = strpos($code, $open, $i);
2236                                if ($open_pos === false) {
2237                                    // no match for this delimiter ever
2238                                    unset($delim_copy[$dk][$open]);
2239                                    continue;
2240                                }
2241                                $matches[$dk][$open]['next_match'] = $open_pos;
2242                            }
2243                            if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2244                                //So we got a new match, update the close_pos
2245                                $matches[$dk][$open]['close_pos'] =
2246                                    strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2247
2248                                $next_match_pointer =& $matches[$dk][$open];
2249                                $next_match_pos = $matches[$dk][$open]['next_match'];
2250                            }
2251                        }
2252                    } else {
2253                        //So we should match an RegExp as Strict Block ...
2254                        /**
2255                         * The value in $delimiters is expected to be an RegExp
2256                         * containing exactly 2 matching groups:
2257                         *  - Group 1 is the opener
2258                         *  - Group 2 is the closer
2259                         */
2260                        if(preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2261                            //We got a match ...
2262                            if(isset($matches_rx['start']) && isset($matches_rx['end']))
2263                            {
2264                                $matches[$dk] = array(
2265                                    'next_match' => $matches_rx['start'][1],
2266                                    'dk' => $dk,
2267
2268                                    'close_strlen' => strlen($matches_rx['end'][0]),
2269                                    'close_pos' => $matches_rx['end'][1],
2270                                    );
2271                            } else {
2272                                $matches[$dk] = array(
2273                                    'next_match' => $matches_rx[1][1],
2274                                    'dk' => $dk,
2275
2276                                    'close_strlen' => strlen($matches_rx[2][0]),
2277                                    'close_pos' => $matches_rx[2][1],
2278                                    );
2279                            }
2280                        } else {
2281                            // no match for this delimiter ever
2282                            unset($delim_copy[$dk]);
2283                            continue;
2284                        }
2285
2286                        if ($matches[$dk]['next_match'] <= $next_match_pos) {
2287                            $next_match_pointer =& $matches[$dk];
2288                            $next_match_pos = $matches[$dk]['next_match'];
2289                        }
2290                    }
2291                }
2292
2293                // non-highlightable text
2294                $parts[$k] = array(
2295                    1 => substr($code, $i, $next_match_pos - $i)
2296                );
2297                ++$k;
2298
2299                if ($next_match_pos > $length) {
2300                    // out of bounds means no next match was found
2301                    break;
2302                }
2303
2304                // highlightable code
2305                $parts[$k][0] = $next_match_pointer['dk'];
2306
2307                //Only combine for non-rx script blocks
2308                if(is_array($delim_copy[$next_match_pointer['dk']])) {
2309                    // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2310                    $i = $next_match_pos + $next_match_pointer['open_strlen'];
2311                    while (true) {
2312                        $close_pos = strpos($code, $next_match_pointer['close'], $i);
2313                        if ($close_pos == false) {
2314                            break;
2315                        }
2316                        $i = $close_pos + $next_match_pointer['close_strlen'];
2317                        if ($i == $length) {
2318                            break;
2319                        }
2320                        if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2321                            substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2322                            // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2323                            foreach ($matches as $submatches) {
2324                                foreach ($submatches as $match) {
2325                                    if ($match['next_match'] == $i) {
2326                                        // a different block already matches here!
2327                                        break 3;
2328                                    }
2329                                }
2330                            }
2331                        } else {
2332                            break;
2333                        }
2334                    }
2335                } else {
2336                    $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2337                    $i = $close_pos;
2338                }
2339
2340                if ($close_pos === false) {
2341                    // no closing delimiter found!
2342                    $parts[$k][1] = substr($code, $next_match_pos);
2343                    ++$k;
2344                    break;
2345                } else {
2346                    $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2347                    ++$k;
2348                }
2349            }
2350            unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2351            $num_parts = $k;
2352
2353            if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2354                // when we have only one part, we don't have anything to highlight at all.
2355                // if we have a "maybe" strict language, this should be handled as highlightable code
2356                $parts = array(
2357                    0 => array(
2358                        0 => '',
2359                        1 => ''
2360                    ),
2361                    1 => array(
2362                        0 => null,
2363                        1 => $parts[0][1]
2364                    )
2365                );
2366                $num_parts = 2;
2367            }
2368
2369        } else {
2370            // Not strict mode - simply dump the source into
2371            // the array at index 1 (the first highlightable block)
2372            $parts = array(
2373                0 => array(
2374                    0 => '',
2375                    1 => ''
2376                ),
2377                1 => array(
2378                    0 => null,
2379                    1 => $code
2380                )
2381            );
2382            $num_parts = 2;
2383        }
2384
2385        //Unset variables we won't need any longer
2386        unset($code);
2387
2388        //Preload some repeatedly used values regarding hardquotes ...
2389        $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2390        $hq_strlen = strlen($hq);
2391
2392        //Preload if line numbers are to be generated afterwards
2393        //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2394        $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2395            !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2396
2397        //preload the escape char for faster checking ...
2398        $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2399
2400        // this is used for single-line comments
2401        $sc_disallowed_before = "";
2402        $sc_disallowed_after = "";
2403
2404        if (isset($this->language_data['PARSER_CONTROL'])) {
2405            if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2406                if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2407                    $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2408                }
2409                if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2410                    $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2411                }
2412            }
2413        }
2414
2415        //Fix for SF#1932083: Multichar Quotemarks unsupported
2416        $is_string_starter = array();
2417        if ($this->lexic_permissions['STRINGS']) {
2418            foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2419                if (!isset($is_string_starter[$quotemark[0]])) {
2420                    $is_string_starter[$quotemark[0]] = (string)$quotemark;
2421                } elseif (is_string($is_string_starter[$quotemark[0]])) {
2422                    $is_string_starter[$quotemark[0]] = array(
2423                        $is_string_starter[$quotemark[0]],
2424                        $quotemark);
2425                } else {
2426                    $is_string_starter[$quotemark[0]][] = $quotemark;
2427                }
2428            }
2429        }
2430
2431        // Now we go through each part. We know that even-indexed parts are
2432        // code that shouldn't be highlighted, and odd-indexed parts should
2433        // be highlighted
2434        for ($key = 0; $key < $num_parts; ++$key) {
2435            $STRICTATTRS = '';
2436
2437            // If this block should be highlighted...
2438            if (!($key & 1)) {
2439                // Else not a block to highlight
2440                $endresult .= $this->hsc($parts[$key][1]);
2441                unset($parts[$key]);
2442                continue;
2443            }
2444
2445            $result = '';
2446            $part = $parts[$key][1];
2447
2448            $highlight_part = true;
2449            if ($this->strict_mode && !is_null($parts[$key][0])) {
2450                // get the class key for this block of code
2451                $script_key = $parts[$key][0];
2452                $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2453                if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2454                    $this->lexic_permissions['SCRIPT']) {
2455                    // Add a span element around the source to
2456                    // highlight the overall source block
2457                    if (!$this->use_classes &&
2458                        $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2459                        $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2460                    } else {
2461                        $attributes = ' class="sc' . $script_key . '"';
2462                    }
2463                    $result .= "<span$attributes>";
2464                    $STRICTATTRS = $attributes;
2465                }
2466            }
2467
2468            if ($highlight_part) {
2469                // Now, highlight the code in this block. This code
2470                // is really the engine of GeSHi (along with the method
2471                // parse_non_string_part).
2472
2473                // cache comment regexps incrementally
2474                $next_comment_regexp_key = '';
2475                $next_comment_regexp_pos = -1;
2476                $next_comment_multi_pos = -1;
2477                $next_comment_single_pos = -1;
2478                $comment_regexp_cache_per_key = array();
2479                $comment_multi_cache_per_key = array();
2480                $comment_single_cache_per_key = array();
2481                $next_open_comment_multi = '';
2482                $next_comment_single_key = '';
2483                $escape_regexp_cache_per_key = array();
2484                $next_escape_regexp_key = '';
2485                $next_escape_regexp_pos = -1;
2486
2487                $length = strlen($part);
2488                for ($i = 0; $i < $length; ++$i) {
2489                    // Get the next char
2490                    $char = $part[$i];
2491                    $char_len = 1;
2492
2493                    // update regexp comment cache if needed
2494                    if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2495                        $next_comment_regexp_pos = $length;
2496                        foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2497                            $match_i = false;
2498                            if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2499                                ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2500                                 $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2501                                // we have already matched something
2502                                if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2503                                    // this comment is never matched
2504                                    continue;
2505                                }
2506                                $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2507                            } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i)) {
2508                                $match_i = $match[0][1];
2509
2510                                $comment_regexp_cache_per_key[$comment_key] = array(
2511                                    'key' => $comment_key,
2512                                    'length' => strlen($match[0][0]),
2513                                    'pos' => $match_i
2514                                );
2515                            } else {
2516                                $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2517                                continue;
2518                            }
2519
2520                            if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2521                                $next_comment_regexp_pos = $match_i;
2522                                $next_comment_regexp_key = $comment_key;
2523                                if ($match_i === $i) {
2524                                    break;
2525                                }
2526                            }
2527                        }
2528                    }
2529
2530                    $string_started = false;
2531
2532                    if (isset($is_string_starter[$char])) {
2533                        // Possibly the start of a new string ...
2534
2535                        //Check which starter it was ...
2536                        //Fix for SF#1932083: Multichar Quotemarks unsupported
2537                        if (is_array($is_string_starter[$char])) {
2538                            $char_new = '';
2539                            foreach ($is_string_starter[$char] as $testchar) {
2540                                if ($testchar === substr($part, $i, strlen($testchar)) &&
2541                                    strlen($testchar) > strlen($char_new)) {
2542                                    $char_new = $testchar;
2543                                    $string_started = true;
2544                                }
2545                            }
2546                            if ($string_started) {
2547                                $char = $char_new;
2548                            }
2549                        } else {
2550                            $testchar = $is_string_starter[$char];
2551                            if ($testchar === substr($part, $i, strlen($testchar))) {
2552                                $char = $testchar;
2553                                $string_started = true;
2554                            }
2555                        }
2556                        $char_len = strlen($char);
2557                    }
2558
2559                    if ($string_started && ($i != $next_comment_regexp_pos)) {
2560                        // Hand out the correct style information for this string
2561                        $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2562                        if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2563                            !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2564                            $string_key = 0;
2565                        }
2566
2567                        // parse the stuff before this
2568                        $result .= $this->parse_non_string_part($stuff_to_parse);
2569                        $stuff_to_parse = '';
2570
2571                        if (!$this->use_classes) {
2572                            $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2573                        } else {
2574                            $string_attributes = ' class="st'.$string_key.'"';
2575                        }
2576
2577                        // now handle the string
2578                        $string = "<span$string_attributes>" . GeSHi::hsc($char);
2579                        $start = $i + $char_len;
2580                        $string_open = true;
2581
2582                        if(empty($this->language_data['ESCAPE_REGEXP'])) {
2583                            $next_escape_regexp_pos = $length;
2584                        }
2585
2586                        do {
2587                            //Get the regular ending pos ...
2588                            $close_pos = strpos($part, $char, $start);
2589                            if(false === $close_pos) {
2590                                $close_pos = $length;
2591                            }
2592
2593                            if($this->lexic_permissions['ESCAPE_CHAR']) {
2594                                // update escape regexp cache if needed
2595                                if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2596                                    $next_escape_regexp_pos = $length;
2597                                    foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2598                                        $match_i = false;
2599                                        if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2600                                            ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2601                                             $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2602                                            // we have already matched something
2603                                            if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2604                                                // this comment is never matched
2605                                                continue;
2606                                            }
2607                                            $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2608                                        } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start)) {
2609                                            $match_i = $match[0][1];
2610
2611                                            $escape_regexp_cache_per_key[$escape_key] = array(
2612                                                'key' => $escape_key,
2613                                                'length' => strlen($match[0][0]),
2614                                                'pos' => $match_i
2615                                            );
2616                                        } else {
2617                                            $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2618                                            continue;
2619                                        }
2620
2621                                        if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2622                                            $next_escape_regexp_pos = $match_i;
2623                                            $next_escape_regexp_key = $escape_key;
2624                                            if ($match_i === $start) {
2625                                                break;
2626                                            }
2627                                        }
2628                                    }
2629                                }
2630
2631                                //Find the next simple escape position
2632                                if('' != $this->language_data['ESCAPE_CHAR']) {
2633                                    $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2634                                    if(false === $simple_escape) {
2635                                        $simple_escape = $length;
2636                                    }
2637                                } else {
2638                                    $simple_escape = $length;
2639                                }
2640                            } else {
2641                                $next_escape_regexp_pos = $length;
2642                                $simple_escape = $length;
2643                            }
2644
2645                            if($simple_escape < $next_escape_regexp_pos &&
2646                                $simple_escape < $length &&
2647                                $simple_escape < $close_pos) {
2648                                //The nexxt escape sequence is a simple one ...
2649                                $es_pos = $simple_escape;
2650
2651                                //Add the stuff not in the string yet ...
2652                                $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2653
2654                                //Get the style for this escaped char ...
2655                                if (!$this->use_classes) {
2656                                    $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2657                                } else {
2658                                    $escape_char_attributes = ' class="es0"';
2659                                }
2660
2661                                //Add the style for the escape char ...
2662                                $string .= "<span$escape_char_attributes>" .
2663                                    GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2664
2665                                //Get the byte AFTER the ESCAPE_CHAR we just found
2666                                $es_char = $part[$es_pos + 1];
2667                                if ($es_char == "\n") {
2668                                    // don't put a newline around newlines
2669                                    $string .= "</span>\n";
2670                                    $start = $es_pos + 2;
2671                                } elseif (ord($es_char) >= 128) {
2672                                    //This is an non-ASCII char (UTF8 or single byte)
2673                                    //This code tries to work around SF#2037598 ...
2674                                    if(function_exists('mb_substr')) {
2675                                        $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2676                                        $string .= $es_char_m . '</span>';
2677                                    } elseif ('utf-8' == $this->encoding) {
2678                                        if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2679                                            "|\xE0[\xA0-\xBF][\x80-\xBF]".
2680                                            "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2681                                            "|\xED[\x80-\x9F][\x80-\xBF]".
2682                                            "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2683                                            "|[\xF1-\xF3][\x80-\xBF]{3}".
2684                                            "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2685                                            $part, $es_char_m, null, $es_pos + 1)) {
2686                                            $es_char_m = $es_char_m[0];
2687                                        } else {
2688                                            $es_char_m = $es_char;
2689                                        }
2690                                        $string .= $this->hsc($es_char_m) . '</span>';
2691                                    } else {
2692                                        $es_char_m = $this->hsc($es_char);
2693                                    }
2694                                    $start = $es_pos + strlen($es_char_m) + 1;
2695                                } else {
2696                                    $string .= $this->hsc($es_char) . '</span>';
2697                                    $start = $es_pos + 2;
2698                                }
2699                            } elseif ($next_escape_regexp_pos < $length &&
2700                                $next_escape_regexp_pos < $close_pos) {
2701                                $es_pos = $next_escape_regexp_pos;
2702                                //Add the stuff not in the string yet ...
2703                                $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2704
2705                                //Get the key and length of this match ...
2706                                $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2707                                $escape_str = substr($part, $es_pos, $escape['length']);
2708                                $escape_key = $escape['key'];
2709
2710                                //Get the style for this escaped char ...
2711                                if (!$this->use_classes) {
2712                                    $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2713                                } else {
2714                                    $escape_char_attributes = ' class="es' . $escape_key . '"';
2715                                }
2716
2717                                //Add the style for the escape char ...
2718                                $string .= "<span$escape_char_attributes>" .
2719                                    $this->hsc($escape_str) . '</span>';
2720
2721                                $start = $es_pos + $escape['length'];
2722                            } else {
2723                                //Copy the remainder of the string ...
2724                                $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2725                                $start = $close_pos + $char_len;
2726                                $string_open = false;
2727                            }
2728                        } while($string_open);
2729
2730                        if ($check_linenumbers) {
2731                            // Are line numbers used? If, we should end the string before
2732                            // the newline and begin it again (so when <li>s are put in the source
2733                            // remains XHTML compliant)
2734                            // note to self: This opens up possibility of config files specifying
2735                            // that languages can/cannot have multiline strings???
2736                            $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2737                        }
2738
2739                        $result .= $string;
2740                        $string = '';
2741                        $i = $start - 1;
2742                        continue;
2743                    } elseif ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2744                        substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2745                        // The start of a hard quoted string
2746                        if (!$this->use_classes) {
2747                            $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2748                            $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2749                        } else {
2750                            $string_attributes = ' class="st_h"';
2751                            $escape_char_attributes = ' class="es_h"';
2752                        }
2753                        // parse the stuff before this
2754                        $result .= $this->parse_non_string_part($stuff_to_parse);
2755                        $stuff_to_parse = '';
2756
2757                        // now handle the string
2758                        $string = '';
2759
2760                        // look for closing quote
2761                        $start = $i + $hq_strlen;
2762                        while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2763                            $start = $close_pos + 1;
2764                            if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] &&
2765                                (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape
2766                                // make sure this quote is not escaped
2767                                foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2768                                    if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2769                                        // check wether this quote is escaped or if it is something like '\\'
2770                                        $escape_char_pos = $close_pos - 1;
2771                                        while ($escape_char_pos > 0
2772                                                && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2773                                            --$escape_char_pos;
2774                                        }
2775                                        if (($close_pos - $escape_char_pos) & 1) {
2776                                            // uneven number of escape chars => this quote is escaped
2777                                            continue 2;
2778                                        }
2779                                    }
2780                                }
2781                            }
2782
2783                            // found closing quote
2784                            break;
2785                        }
2786
2787                        //Found the closing delimiter?
2788                        if (!$close_pos) {
2789                            // span till the end of this $part when no closing delimiter is found
2790                            $close_pos = $length;
2791                        }
2792
2793                        //Get the actual string
2794                        $string = substr($part, $i, $close_pos - $i + 1);
2795                        $i = $close_pos;
2796
2797                        // handle escape chars and encode html chars
2798                        // (special because when we have escape chars within our string they may not be escaped)
2799                        if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2800                            $start = 0;
2801                            $new_string = '';
2802                            while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2803                                // hmtl escape stuff before
2804                                $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2805                                // check if this is a hard escape
2806                                foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2807                                    if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2808                                        // indeed, this is a hardescape
2809                                        $new_string .= "<span$escape_char_attributes>" .
2810                                            $this->hsc($hardescape) . '</span>';
2811                                        $start = $es_pos + strlen($hardescape);
2812                                        continue 2;
2813                                    }
2814                                }
2815                                // not a hard escape, but a normal escape
2816                                // they come in pairs of two
2817                                $c = 0;
2818                                while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2819                                    && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2820                                    && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2821                                    $c += 2;
2822                                }
2823                                if ($c) {
2824                                    $new_string .= "<span$escape_char_attributes>" .
2825                                        str_repeat($escaped_escape_char, $c) .
2826                                        '</span>';
2827                                    $start = $es_pos + $c;
2828                                } else {
2829                                    // this is just a single lonely escape char...
2830                                    $new_string .= $escaped_escape_char;
2831                                    $start = $es_pos + 1;
2832                                }
2833                            }
2834                            $string = $new_string . $this->hsc(substr($string, $start));
2835                        } else {
2836                            $string = $this->hsc($string);
2837                        }
2838
2839                        if ($check_linenumbers) {
2840                            // Are line numbers used? If, we should end the string before
2841                            // the newline and begin it again (so when <li>s are put in the source
2842                            // remains XHTML compliant)
2843                            // note to self: This opens up possibility of config files specifying
2844                            // that languages can/cannot have multiline strings???
2845                            $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2846                        }
2847
2848                        $result .= "<span$string_attributes>" . $string . '</span>';
2849                        $string = '';
2850                        continue;
2851                    } else {
2852                        //Have a look for regexp comments
2853                        if ($i == $next_comment_regexp_pos) {
2854                            $COMMENT_MATCHED = true;
2855                            $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2856                            $test_str = $this->hsc(substr($part, $i, $comment['length']));
2857
2858                            //@todo If remove important do remove here
2859                            if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2860                                if (!$this->use_classes) {
2861                                    $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2862                                } else {
2863                                    $attributes = ' class="co' . $comment['key'] . '"';
2864                                }
2865
2866                                $test_str = "<span$attributes>" . $test_str . "</span>";
2867
2868                                // Short-cut through all the multiline code
2869                                if ($check_linenumbers) {
2870                                    // strreplace to put close span and open span around multiline newlines
2871                                    $test_str = str_replace(
2872                                        "\n", "</span>\n<span$attributes>",
2873                                        str_replace("\n ", "\n&nbsp;", $test_str)
2874                                    );
2875                                }
2876                            }
2877
2878                            $i += $comment['length'] - 1;
2879
2880                            // parse the rest
2881                            $result .= $this->parse_non_string_part($stuff_to_parse);
2882                            $stuff_to_parse = '';
2883                        }
2884
2885                        // If we haven't matched a regexp comment, try multi-line comments
2886                        if (!$COMMENT_MATCHED) {
2887                            // Is this a multiline comment?
2888                            if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2889                                $next_comment_multi_pos = $length;
2890                                foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2891                                    $match_i = false;
2892                                    if (isset($comment_multi_cache_per_key[$open]) &&
2893                                        ($comment_multi_cache_per_key[$open] >= $i ||
2894                                         $comment_multi_cache_per_key[$open] === false)) {
2895                                        // we have already matched something
2896                                        if ($comment_multi_cache_per_key[$open] === false) {
2897                                            // this comment is never matched
2898                                            continue;
2899                                        }
2900                                        $match_i = $comment_multi_cache_per_key[$open];
2901                                    } elseif (($match_i = stripos($part, $open, $i)) !== false) {
2902                                        $comment_multi_cache_per_key[$open] = $match_i;
2903                                    } else {
2904                                        $comment_multi_cache_per_key[$open] = false;
2905                                        continue;
2906                                    }
2907                                    if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2908                                        $next_comment_multi_pos = $match_i;
2909                                        $next_open_comment_multi = $open;
2910                                        if ($match_i === $i) {
2911                                            break;
2912                                        }
2913                                    }
2914                                }
2915                            }
2916                            if ($i == $next_comment_multi_pos) {
2917                                $open = $next_open_comment_multi;
2918                                $close = $this->language_data['COMMENT_MULTI'][$open];
2919                                $open_strlen = strlen($open);
2920                                $close_strlen = strlen($close);
2921                                $COMMENT_MATCHED = true;
2922                                $test_str_match = $open;
2923                                //@todo If remove important do remove here
2924                                if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2925                                    $open == GESHI_START_IMPORTANT) {
2926                                    if ($open != GESHI_START_IMPORTANT) {
2927                                        if (!$this->use_classes) {
2928                                            $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2929                                        } else {
2930                                            $attributes = ' class="coMULTI"';
2931                                        }
2932                                        $test_str = "<span$attributes>" . $this->hsc($open);
2933                                    } else {
2934                                        if (!$this->use_classes) {
2935                                            $attributes = ' style="' . $this->important_styles . '"';
2936                                        } else {
2937                                            $attributes = ' class="imp"';
2938                                        }
2939
2940                                        // We don't include the start of the comment if it's an
2941                                        // "important" part
2942                                        $test_str = "<span$attributes>";
2943                                    }
2944                                } else {
2945                                    $test_str = $this->hsc($open);
2946                                }
2947
2948                                $close_pos = strpos( $part, $close, $i + $open_strlen );
2949
2950                                if ($close_pos === false) {
2951                                    $close_pos = $length;
2952                                }
2953
2954                                // Short-cut through all the multiline code
2955                                $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2956                                if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
2957                                    $test_str_match == GESHI_START_IMPORTANT) &&
2958                                    $check_linenumbers) {
2959
2960                                    // strreplace to put close span and open span around multiline newlines
2961                                    $test_str .= str_replace(
2962                                        "\n", "</span>\n<span$attributes>",
2963                                        str_replace("\n ", "\n&nbsp;", $rest_of_comment)
2964                                    );
2965                                } else {
2966                                    $test_str .= $rest_of_comment;
2967                                }
2968
2969                                if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2970                                    $test_str_match == GESHI_START_IMPORTANT) {
2971                                    $test_str .= '</span>';
2972                                }
2973
2974                                $i = $close_pos + $close_strlen - 1;
2975
2976                                // parse the rest
2977                                $result .= $this->parse_non_string_part($stuff_to_parse);
2978                                $stuff_to_parse = '';
2979                            }
2980                        }
2981
2982                        // If we haven't matched a multiline comment, try single-line comments
2983                        if (!$COMMENT_MATCHED) {
2984                            // cache potential single line comment occurances
2985                            if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2986                                $next_comment_single_pos = $length;
2987                                foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2988                                    $match_i = false;
2989                                    if (isset($comment_single_cache_per_key[$comment_key]) &&
2990                                        ($comment_single_cache_per_key[$comment_key] >= $i ||
2991                                         $comment_single_cache_per_key[$comment_key] === false)) {
2992                                        // we have already matched something
2993                                        if ($comment_single_cache_per_key[$comment_key] === false) {
2994                                            // this comment is never matched
2995                                            continue;
2996                                        }
2997                                        $match_i = $comment_single_cache_per_key[$comment_key];
2998                                    } elseif (
2999                                        // case sensitive comments
3000                                        ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3001                                        ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
3002                                        // non case sensitive
3003                                        (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3004                                          (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
3005                                        $comment_single_cache_per_key[$comment_key] = $match_i;
3006                                    } else {
3007                                        $comment_single_cache_per_key[$comment_key] = false;
3008                                        continue;
3009                                    }
3010                                    if ($match_i !== false && $match_i < $next_comment_single_pos) {
3011                                        $next_comment_single_pos = $match_i;
3012                                        $next_comment_single_key = $comment_key;
3013                                        if ($match_i === $i) {
3014                                            break;
3015                                        }
3016                                    }
3017                                }
3018                            }
3019                            if ($next_comment_single_pos == $i) {
3020                                $comment_key = $next_comment_single_key;
3021                                $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
3022                                $com_len = strlen($comment_mark);
3023
3024                                // This check will find special variables like $# in bash
3025                                // or compiler directives of Delphi beginning {$
3026                                if ((empty($sc_disallowed_before) || ($i == 0) ||
3027                                    (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
3028                                    (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
3029                                    (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
3030                                {
3031                                    // this is a valid comment
3032                                    $COMMENT_MATCHED = true;
3033                                    if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3034                                        if (!$this->use_classes) {
3035                                            $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
3036                                        } else {
3037                                            $attributes = ' class="co' . $comment_key . '"';
3038                                        }
3039                                        $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
3040                                    } else {
3041                                        $test_str = $this->hsc($comment_mark);
3042                                    }
3043
3044                                    //Check if this comment is the last in the source
3045                                    $close_pos = strpos($part, "\n", $i);
3046                                    $oops = false;
3047                                    if ($close_pos === false) {
3048                                        $close_pos = $length;
3049                                        $oops = true;
3050                                    }
3051                                    $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
3052                                    if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3053                                        $test_str .= "</span>";
3054                                    }
3055
3056                                    // Take into account that the comment might be the last in the source
3057                                    if (!$oops) {
3058                                      $test_str .= "\n";
3059                                    }
3060
3061                                    $i = $close_pos;
3062
3063                                    // parse the rest
3064                                    $result .= $this->parse_non_string_part($stuff_to_parse);
3065                                    $stuff_to_parse = '';
3066                                }
3067                            }
3068                        }
3069                    }
3070
3071                    // Where are we adding this char?
3072                    if (!$COMMENT_MATCHED) {
3073                        $stuff_to_parse .= $char;
3074                    } else {
3075                        $result .= $test_str;
3076                        unset($test_str);
3077                        $COMMENT_MATCHED = false;
3078                    }
3079                }
3080                // Parse the last bit
3081                $result .= $this->parse_non_string_part($stuff_to_parse);
3082                $stuff_to_parse = '';
3083            } else {
3084                $result .= $this->hsc($part);
3085            }
3086            // Close the <span> that surrounds the block
3087            if ($STRICTATTRS != '') {
3088                $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
3089                $result .= '</span>';
3090            }
3091
3092            $endresult .= $result;
3093            unset($part, $parts[$key], $result);
3094        }
3095
3096        //This fix is related to SF#1923020, but has to be applied regardless of
3097        //actually highlighting symbols.
3098        /** NOTE: memorypeak #3 */
3099        $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
3100
3101//        // Parse the last stuff (redundant?)
3102//        $result .= $this->parse_non_string_part($stuff_to_parse);
3103
3104        // Lop off the very first and last spaces
3105//        $result = substr($result, 1, -1);
3106
3107        // We're finished: stop timing
3108        $this->set_time($start_time, microtime());
3109
3110        $this->finalise($endresult);
3111        return $endresult;
3112    }
3113
3114    /**
3115     * Swaps out spaces and tabs for HTML indentation. Not needed if
3116     * the code is in a pre block...
3117     *
3118     * @param  string $result The source to indent (reference!)
3119     * @since  1.0.0
3120     */
3121    protected function indent(&$result) {
3122        /// Replace tabs with the correct number of spaces
3123        if (false !== strpos($result, "\t")) {
3124            $lines = explode("\n", $result);
3125            $result = null;//Save memory while we process the lines individually
3126            $tab_width = $this->get_real_tab_width();
3127            $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
3128
3129            for ($key = 0, $n = count($lines); $key < $n; $key++) {
3130                $line = $lines[$key];
3131                if (false === strpos($line, "\t")) {
3132                    continue;
3133                }
3134
3135                $pos = 0;
3136                $length = strlen($line);
3137                $lines[$key] = ''; // reduce memory
3138
3139                $IN_TAG = false;
3140                for ($i = 0; $i < $length; ++$i) {
3141                    $char = $line[$i];
3142                    // Simple engine to work out whether we're in a tag.
3143                    // If we are we modify $pos. This is so we ignore HTML
3144                    // in the line and only workout the tab replacement
3145                    // via the actual content of the string
3146                    // This test could be improved to include strings in the
3147                    // html so that < or > would be allowed in user's styles
3148                    // (e.g. quotes: '<' '>'; or similar)
3149                    if ($IN_TAG) {
3150                        if ('>' == $char) {
3151                            $IN_TAG = false;
3152                        }
3153                        $lines[$key] .= $char;
3154                    } elseif ('<' == $char) {
3155                        $IN_TAG = true;
3156                        $lines[$key] .= '<';
3157                    } elseif ('&' == $char) {
3158                        $substr = substr($line, $i + 3, 5);
3159                        $posi = strpos($substr, ';');
3160                        if (false === $posi) {
3161                            ++$pos;
3162                        } else {
3163                            $pos -= $posi+2;
3164                        }
3165                        $lines[$key] .= $char;
3166                    } elseif ("\t" == $char) {
3167                        $str = '';
3168                        // OPTIMISE - move $strs out. Make an array:
3169                        // $tabs = array(
3170                        //  1 => '&nbsp;',
3171                        //  2 => '&nbsp; ',
3172                        //  3 => '&nbsp; &nbsp;' etc etc
3173                        // to use instead of building a string every time
3174                        $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
3175                        if (($pos & 1) || 1 == $tab_end_width) {
3176                            $str .= substr($tab_string, 6, $tab_end_width);
3177                        } else {
3178                            $str .= substr($tab_string, 0, $tab_end_width+5);
3179                        }
3180                        $lines[$key] .= $str;
3181                        $pos += $tab_end_width;
3182
3183                        if (false === strpos($line, "\t", $i + 1)) {
3184                            $lines[$key] .= substr($line, $i + 1);
3185                            break;
3186                        }
3187                    } elseif (0 == $pos && ' ' == $char) {
3188                        $lines[$key] .= '&nbsp;';
3189                        ++$pos;
3190                    } else {
3191                        $lines[$key] .= $char;
3192                        ++$pos;
3193                    }
3194                }
3195            }
3196            $result = implode("\n", $lines);
3197            unset($lines);//We don't need the lines separated beyond this --- free them!
3198        }
3199        // Other whitespace
3200        // BenBE: Fix to reduce the number of replacements to be done
3201        $result = preg_replace('/^ /m', '&nbsp;', $result);
3202        $result = str_replace('  ', ' &nbsp;', $result);
3203
3204        if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3205            if ($this->line_ending === null) {
3206                $result = nl2br($result);
3207            } else {
3208                $result = str_replace("\n", $this->line_ending, $result);
3209            }
3210        }
3211    }
3212
3213    /**
3214     * Changes the case of a keyword for those languages where a change is asked for
3215     *
3216     * @param  string $instr The keyword to change the case of
3217     * @return string The keyword with its case changed
3218     * @since  1.0.0
3219     */
3220    protected function change_case($instr) {
3221        switch ($this->language_data['CASE_KEYWORDS']) {
3222            case GESHI_CAPS_UPPER:
3223                return strtoupper($instr);
3224            case GESHI_CAPS_LOWER:
3225                return strtolower($instr);
3226            default:
3227                return $instr;
3228        }
3229    }
3230
3231    /**
3232     * Handles replacements of keywords to include markup and links if requested
3233     *
3234     * @param  string $match The keyword to add the Markup to
3235     * @return string The HTML for the match found
3236     * @since  1.0.8
3237     *
3238     * @todo   Get rid of ender in keyword links
3239     */
3240    protected function handle_keyword_replace($match) {
3241        $k = $this->_kw_replace_group;
3242        $keyword = $match[0];
3243        $keyword_match = $match[1];
3244
3245        $before = '';
3246        $after = '';
3247
3248        if ($this->keyword_links) {
3249            // Keyword links have been ebabled
3250
3251            if (isset($this->language_data['URLS'][$k]) &&
3252                $this->language_data['URLS'][$k] != '') {
3253                // There is a base group for this keyword
3254
3255                // Old system: strtolower
3256                //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3257                // New system: get keyword from language file to get correct case
3258                if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3259                    strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3260                    foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3261                        if (strcasecmp($word, $keyword_match) == 0) {
3262                            break;
3263                        }
3264                    }
3265                } else {
3266                    $word = $keyword_match;
3267                }
3268
3269                $before = '<|UR1|"' .
3270                    str_replace(
3271                        array(
3272                            '{FNAME}',
3273                            '{FNAMEL}',
3274                            '{FNAMEU}',
3275                            '{FNAMEUF}',
3276                            '.'),
3277                        array(
3278                            str_replace('+', '%20', urlencode($this->hsc($word))),
3279                            str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3280                            str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3281                            str_replace('+', '%20', urlencode($this->hsc(ucfirst($word)))),
3282                            '<DOT>'),
3283                        $this->language_data['URLS'][$k]
3284                    ) . '">';
3285                $after = '</a>';
3286            }
3287        }
3288
3289        return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3290    }
3291
3292    /**
3293     * handles regular expressions highlighting-definitions with callback functions
3294     *
3295     * @note this is a callback, don't use it directly
3296     *
3297     * @param array $matches the matches array
3298     * @return string The highlighted string
3299     * @since 1.0.8
3300     */
3301    protected function handle_regexps_callback($matches) {
3302        // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3303        return  ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
3304    }
3305
3306    /**
3307     * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
3308     *
3309     * @note this is a callback, don't use it directly
3310     *
3311     * @param array $matches the matches array
3312     * @return string
3313     * @since 1.0.8
3314     */
3315    protected function handle_multiline_regexps($matches) {
3316        $before = $this->_hmr_before;
3317        $after = $this->_hmr_after;
3318        if ($this->_hmr_replace) {
3319            $replace = $this->_hmr_replace;
3320            $search = array();
3321
3322            foreach (array_keys($matches) as $k) {
3323                $search[] = '\\' . $k;
3324            }
3325
3326            $before = str_replace($search, $matches, $before);
3327            $after = str_replace($search, $matches, $after);
3328            $replace = str_replace($search, $matches, $replace);
3329        } else {
3330            $replace = $matches[0];
3331        }
3332        return $before
3333                    . '<|!REG3XP' . $this->_hmr_key .'!>'
3334                        . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
3335                    . '|>'
3336              . $after;
3337    }
3338
3339    /**
3340     * Takes a string that has no strings or comments in it, and highlights
3341     * stuff like keywords, numbers and methods.
3342     *
3343     * @param string $stuff_to_parse The string to parse for keyword, numbers etc.
3344     * @since 1.0.0
3345     * @todo BUGGY! Why? Why not build string and return?
3346     * @return string
3347     */
3348    protected function parse_non_string_part($stuff_to_parse) {
3349        $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3350
3351        // Highlight keywords
3352        $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#|^&";
3353        $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3354        if ($this->lexic_permissions['STRINGS']) {
3355            $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3356            $disallowed_before .= $quotemarks;
3357            $disallowed_after .= $quotemarks;
3358        }
3359        $disallowed_before .= "])";
3360        $disallowed_after .= "])";
3361
3362        $parser_control_pergroup = false;
3363        if (isset($this->language_data['PARSER_CONTROL'])) {
3364            if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3365                $x = 0; // check wether per-keyword-group parser_control is enabled
3366                if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3367                    $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3368                    ++$x;
3369                }
3370                if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3371                    $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3372                    ++$x;
3373                }
3374                $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3375            }
3376        }
3377
3378        foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3379            if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
3380                $this->lexic_permissions['KEYWORDS'][$k]) {
3381
3382                $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3383                $modifiers = $case_sensitive ? '' : 'i';
3384
3385                // NEW in 1.0.8 - per-keyword-group parser control
3386                $disallowed_before_local = $disallowed_before;
3387                $disallowed_after_local = $disallowed_after;
3388                if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3389                    if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3390                        $disallowed_before_local =
3391                            $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3392                    }
3393
3394                    if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3395                        $disallowed_after_local =
3396                            $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3397                    }
3398                }
3399
3400                $this->_kw_replace_group = $k;
3401
3402                //NEW in 1.0.8, the cached regexp list
3403                // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3404                for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set <  $set_length; ++$set) {
3405                    $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3406                    // Might make a more unique string for putting the number in soon
3407                    // Basically, we don't put the styles in yet because then the styles themselves will
3408                    // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3409                    $stuff_to_parse = preg_replace_callback(
3410                        "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php|aspx?))$disallowed_after_local/$modifiers",
3411                        array($this, 'handle_keyword_replace'),
3412                        $stuff_to_parse
3413                        );
3414                }
3415            }
3416        }
3417
3418        // Regular expressions
3419        foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3420            if ($this->lexic_permissions['REGEXPS'][$key]) {
3421                if (is_array($regexp)) {
3422                    if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3423                        // produce valid HTML when we match multiple lines
3424                        $this->_hmr_replace = $regexp[GESHI_REPLACE];
3425                        $this->_hmr_before = $regexp[GESHI_BEFORE];
3426                        $this->_hmr_key = $key;
3427                        $this->_hmr_after = $regexp[GESHI_AFTER];
3428                        $stuff_to_parse = preg_replace_callback(
3429                            "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3430                            array($this, 'handle_multiline_regexps'),
3431                            $stuff_to_parse);
3432                        $this->_hmr_replace = false;
3433                        $this->_hmr_before = '';
3434                        $this->_hmr_after = '';
3435                    } else {
3436                        $stuff_to_parse = preg_replace(
3437                            '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3438                            $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
3439                            $stuff_to_parse);
3440                    }
3441                } else {
3442                    if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3443                        // produce valid HTML when we match multiple lines
3444                        $this->_hmr_key = $key;
3445                        $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3446                                              array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3447                        $this->_hmr_key = '';
3448                    } else {
3449                        $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3450                    }
3451                }
3452            }
3453        }
3454
3455        // Highlight numbers. As of 1.0.8 we support different types of numbers
3456        $numbers_found = false;
3457
3458        if ($this->lexic_permissions['NUMBERS'] && preg_match($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'], $stuff_to_parse )) {
3459            $numbers_found = true;
3460
3461            //For each of the formats ...
3462            foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3463                //Check if it should be highlighted ...
3464                $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3465            }
3466        }
3467
3468        //
3469        // Now that's all done, replace /[number]/ with the correct styles
3470        //
3471        foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3472            if (!$this->use_classes) {
3473                $attributes = ' style="' .
3474                    (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3475                    $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3476            } else {
3477                $attributes = ' class="kw' . $k . '"';
3478            }
3479            $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3480        }
3481
3482        if ($numbers_found) {
3483            // Put number styles in
3484            foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3485                //Commented out for now, as this needs some review ...
3486                //                if ($numbers_permissions & $id) {
3487                //Get the appropriate style ...
3488                //Checking for unset styles is done by the style cache builder ...
3489                if (!$this->use_classes) {
3490                    $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3491                } else {
3492                    $attributes = ' class="nu'.$id.'"';
3493                }
3494
3495                //Set in the correct styles ...
3496                $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3497                //                }
3498            }
3499        }
3500
3501        // Highlight methods and fields in objects
3502        if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3503            $oolang_spaces = "[\s]*";
3504            $oolang_before = "";
3505            $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3506            if (isset($this->language_data['PARSER_CONTROL'])) {
3507                if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3508                    if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3509                        $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3510                    }
3511                    if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3512                        $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3513                    }
3514                    if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3515                        $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3516                    }
3517                }
3518            }
3519
3520            foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3521                if (false !== strpos($stuff_to_parse, $splitter)) {
3522                    if (!$this->use_classes) {
3523                        $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3524                    } else {
3525                        $attributes = ' class="me' . $key . '"';
3526                    }
3527                    $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3528                }
3529            }
3530        }
3531
3532        //
3533        // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3534        // You try it, and see what happens ;)
3535        // TODO: Fix lexic permissions not converting entities if shouldn't
3536        // be highlighting regardless
3537        //
3538        if ($this->lexic_permissions['BRACKETS']) {
3539            $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3540                              $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3541        }
3542
3543
3544        //FIX for symbol highlighting ...
3545        if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3546            //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3547            $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
3548            $global_offset = 0;
3549            for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3550                $symbol_match = $pot_symbols[$s_id][0][0];
3551                if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
3552                    // already highlighted blocks _must_ include either < or >
3553                    // so if this conditional applies, we have to skip this match
3554                    // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3555                    if(strpos($symbol_match, '<SEMI>') === false &&
3556                        strpos($symbol_match, '<PIPE>') === false) {
3557                        continue;
3558                    }
3559                }
3560
3561                // if we reach this point, we have a valid match which needs to be highlighted
3562
3563                $symbol_length = strlen($symbol_match);
3564                $symbol_offset = $pot_symbols[$s_id][0][1];
3565                unset($pot_symbols[$s_id]);
3566                $symbol_hl = "";
3567
3568                // if we have multiple styles, we have to handle them properly
3569                if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3570                    $old_sym = -1;
3571                    // Split the current stuff to replace into its atomic symbols ...
3572                    preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3573                    foreach ($sym_match_syms[0] as $sym_ms) {
3574                        //Check if consequtive symbols belong to the same group to save output ...
3575                        if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3576                            && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3577                            if (-1 != $old_sym) {
3578                                $symbol_hl .= "|>";
3579                            }
3580                            $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3581                            if (!$this->use_classes) {
3582                                $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3583                            } else {
3584                                $symbol_hl .= '<| class="sy' . $old_sym . '">';
3585                            }
3586                        }
3587                        $symbol_hl .= $sym_ms;
3588                    }
3589                    unset($sym_match_syms);
3590
3591                    //Close remaining tags and insert the replacement at the right position ...
3592                    //Take caution if symbol_hl is empty to avoid doubled closing spans.
3593                    if (-1 != $old_sym) {
3594                        $symbol_hl .= "|>";
3595                    }
3596                } else {
3597                    if (!$this->use_classes) {
3598                        $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3599                    } else {
3600                        $symbol_hl = '<| class="sy0">';
3601                    }
3602                    $symbol_hl .= $symbol_match . '|>';
3603                }
3604
3605                $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3606
3607                // since we replace old text with something of different size,
3608                // we'll have to keep track of the differences
3609                $global_offset += strlen($symbol_hl) - $symbol_length;
3610            }
3611        }
3612        //FIX for symbol highlighting ...
3613
3614        // Add class/style for regexps
3615        foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3616            if ($this->lexic_permissions['REGEXPS'][$key]) {
3617                if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3618                    $this->_rx_key = $key;
3619                    $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3620                        array($this, 'handle_regexps_callback'),
3621                        $stuff_to_parse);
3622                } else {
3623                    if (!$this->use_classes) {
3624                        $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3625                    } else {
3626                        if (is_array($this->language_data['REGEXPS'][$key]) &&
3627                            array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3628                            $attributes = ' class="' .
3629                                $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3630                        } else {
3631                           $attributes = ' class="re' . $key . '"';
3632                        }
3633                    }
3634                    $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3635                }
3636            }
3637        }
3638
3639        // Replace <DOT> with . for urls
3640        $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3641        // Replace <|UR1| with <a href= for urls also
3642        if (isset($this->link_styles[GESHI_LINK])) {
3643            if ($this->use_classes) {
3644                $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3645            } else {
3646                $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3647            }
3648        } else {
3649            $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3650        }
3651
3652        //
3653        // NOW we add the span thingy ;)
3654        //
3655
3656        $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3657        $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3658        return substr($stuff_to_parse, 1);
3659    }
3660
3661    /**
3662     * Sets the time taken to parse the code
3663     *
3664     * @param string $start_time The time when parsing started as returned by @see microtime()
3665     * @param string $end_time   The time when parsing ended as returned by @see microtime()
3666     * @since 1.0.2
3667     */
3668    protected function set_time($start_time, $end_time) {
3669        $start = explode(' ', $start_time);
3670        $end = explode(' ', $end_time);
3671        $this->time = $end[0] + $end[1] - $start[0] - $start[1];
3672    }
3673
3674    /**
3675     * Gets the time taken to parse the code
3676     *
3677     * @return double The time taken to parse the code
3678     * @since  1.0.2
3679     */
3680    public function get_time() {
3681        return $this->time;
3682    }
3683
3684    /**
3685     * Merges arrays recursively, overwriting values of the first array with values of later arrays
3686     *
3687     * @since 1.0.8
3688     */
3689    protected function merge_arrays() {
3690        $arrays = func_get_args();
3691        $narrays = count($arrays);
3692
3693        // check arguments
3694        // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3695        for ($i = 0; $i < $narrays; $i ++) {
3696            if (!is_array($arrays[$i])) {
3697                // also array_merge_recursive returns nothing in this case
3698                trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3699                return false;
3700            }
3701        }
3702
3703        // the first array is in the output set in every case
3704        $ret = $arrays[0];
3705
3706        // merege $ret with the remaining arrays
3707        for ($i = 1; $i < $narrays; $i ++) {
3708            foreach ($arrays[$i] as $key => $value) {
3709                if (is_array($value) && isset($ret[$key])) {
3710                    // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3711                    // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3712                    $ret[$key] = $this->merge_arrays($ret[$key], $value);
3713                } else {
3714                    $ret[$key] = $value;
3715                }
3716            }
3717        }
3718
3719        return $ret;
3720    }
3721
3722    /**
3723     * Gets language information and stores it for later use
3724     *
3725     * @param string $file_name The filename of the language file you want to load
3726     * @since 1.0.0
3727     * @todo Needs to load keys for lexic permissions for keywords, regexps etc
3728     */
3729    protected function load_language($file_name) {
3730        if ($file_name == $this->loaded_language) {
3731            // this file is already loaded!
3732            return;
3733        }
3734
3735        //Prepare some stuff before actually loading the language file
3736        $this->loaded_language = $file_name;
3737        $this->parse_cache_built = false;
3738        $this->enable_highlighting();
3739        $language_data = array();
3740
3741        //Load the language file
3742        require $file_name;
3743
3744        // Perhaps some checking might be added here later to check that
3745        // $language data is a valid thing but maybe not
3746        $this->language_data = $language_data;
3747
3748        // Set strict mode if should be set
3749        $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3750
3751        // Set permissions for all lexics to true
3752        // so they'll be highlighted by default
3753        foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3754            if (!empty($this->language_data['KEYWORDS'][$key])) {
3755                $this->lexic_permissions['KEYWORDS'][$key] = true;
3756            } else {
3757                $this->lexic_permissions['KEYWORDS'][$key] = false;
3758            }
3759        }
3760
3761        foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3762            $this->lexic_permissions['COMMENTS'][$key] = true;
3763        }
3764        foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3765            $this->lexic_permissions['REGEXPS'][$key] = true;
3766        }
3767
3768        // for BenBE and future code reviews:
3769        // we can use empty here since we only check for existance and emptiness of an array
3770        // if it is not an array at all but rather false or null this will work as intended as well
3771        // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3772        if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3773            foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3774                // it's either true or false and maybe is true as well
3775                $perm = $value !== GESHI_NEVER;
3776                if ($flag == 'ALL') {
3777                    $this->enable_highlighting($perm);
3778                    continue;
3779                }
3780                if (!isset($this->lexic_permissions[$flag])) {
3781                    // unknown lexic permission
3782                    continue;
3783                }
3784                if (is_array($this->lexic_permissions[$flag])) {
3785                    foreach ($this->lexic_permissions[$flag] as $key => $val) {
3786                        $this->lexic_permissions[$flag][$key] = $perm;
3787                    }
3788                } else {
3789                    $this->lexic_permissions[$flag] = $perm;
3790                }
3791            }
3792            unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3793        }
3794
3795        //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3796        //You need to set one for HARDESCAPES only in this case.
3797        if(!isset($this->language_data['HARDCHAR'])) {
3798            $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
3799        }
3800
3801        //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3802        $style_filename = substr($file_name, 0, -4) . '.style.php';
3803        if (is_readable($style_filename)) {
3804            //Clear any style_data that could have been set before ...
3805            if (isset($style_data)) {
3806                unset($style_data);
3807            }
3808
3809            //Read the Style Information from the style file
3810            include $style_filename;
3811
3812            //Apply the new styles to our current language styles
3813            if (isset($style_data) && is_array($style_data)) {
3814                $this->language_data['STYLES'] =
3815                    $this->merge_arrays($this->language_data['STYLES'], $style_data);
3816            }
3817        }
3818    }
3819
3820    /**
3821     * Takes the parsed code and various options, and creates the HTML
3822     * surrounding it to make it look nice.
3823     *
3824     * @param  string $parsed_code The code already parsed (reference!)
3825     * @since  1.0.0
3826     */
3827    protected function finalise(&$parsed_code) {
3828        // Remove end parts of important declarations
3829        // This is BUGGY!! My fault for bad code: fix coming in 1.2
3830        // @todo Remove this crap
3831        if ($this->enable_important_blocks &&
3832            (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3833            $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3834        }
3835
3836        // Add HTML whitespace stuff if we're using the <div> header
3837        if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3838            $this->indent($parsed_code);
3839        }
3840
3841        // purge some unnecessary stuff
3842        /** NOTE: memorypeak #1 */
3843        $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3844
3845        // If we are using IDs for line numbers, there needs to be an overall
3846        // ID set to prevent collisions.
3847        if ($this->add_ids && !$this->overall_id) {
3848            $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3849        }
3850
3851        // Get code into lines
3852        /** NOTE: memorypeak #2 */
3853        $code = explode("\n", $parsed_code);
3854        $parsed_code = $this->header();
3855
3856        // If we're using line numbers, we insert <li>s and appropriate
3857        // markup to style them (otherwise we don't need to do anything)
3858        if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3859            // If we're using the <pre> header, we shouldn't add newlines because
3860            // the <pre> will line-break them (and the <li>s already do this for us)
3861            $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3862
3863            // Foreach line...
3864            for ($i = 0, $n = count($code); $i < $n;) {
3865                //Reset the attributes for a new line ...
3866                $attrs = array();
3867
3868                // Make lines have at least one space in them if they're empty
3869                // BenBE: Checking emptiness using trim instead of relying on blanks
3870                if ('' == trim($code[$i])) {
3871                    $code[$i] = '&nbsp;';
3872                }
3873
3874                // If this is a "special line"...
3875                if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3876                    $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3877                    // Set the attributes to style the line
3878                    if ($this->use_classes) {
3879                        //$attr = ' class="li2"';
3880                        $attrs['class'][] = 'li2';
3881                        $def_attr = ' class="de2"';
3882                    } else {
3883                        //$attr = ' style="' . $this->line_style2 . '"';
3884                        $attrs['style'][] = $this->line_style2;
3885                        // This style "covers up" the special styles set for special lines
3886                        // so that styles applied to special lines don't apply to the actual
3887                        // code on that line
3888                        $def_attr = ' style="' . $this->code_style . '"';
3889                    }
3890                } else {
3891                    if ($this->use_classes) {
3892                        //$attr = ' class="li1"';
3893                        $attrs['class'][] = 'li1';
3894                        $def_attr = ' class="de1"';
3895                    } else {
3896                        //$attr = ' style="' . $this->line_style1 . '"';
3897                        $attrs['style'][] = $this->line_style1;
3898                        $def_attr = ' style="' . $this->code_style . '"';
3899                    }
3900                }
3901
3902                //Check which type of tag to insert for this line
3903                if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3904                    $start = "<pre$def_attr>";
3905                    $end = '</pre>';
3906                } else {
3907                    // Span or div?
3908                    $start = "<div$def_attr>";
3909                    $end = '</div>';
3910                }
3911
3912                ++$i;
3913
3914                // Are we supposed to use ids? If so, add them
3915                if ($this->add_ids) {
3916                    $attrs['id'][] = "$this->overall_id-$i";
3917                }
3918
3919                //Is this some line with extra styles???
3920                if (in_array($i, $this->highlight_extra_lines)) {
3921                    if ($this->use_classes) {
3922                        if (isset($this->highlight_extra_lines_styles[$i])) {
3923                            $attrs['class'][] = "lx$i";
3924                        } else {
3925                            $attrs['class'][] = "ln-xtra";
3926                        }
3927                    } else {
3928                        array_push($attrs['style'], $this->get_line_style($i));
3929                    }
3930                }
3931
3932                // Add in the line surrounded by appropriate list HTML
3933                $attr_string = '';
3934                foreach ($attrs as $key => $attr) {
3935                    $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3936                }
3937
3938                $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3939                unset($code[$i - 1]);
3940            }
3941        } else {
3942            $n = count($code);
3943            if ($this->use_classes) {
3944                $attributes = ' class="de1"';
3945            } else {
3946                $attributes = ' style="'. $this->code_style .'"';
3947            }
3948            if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3949                $parsed_code .= '<pre'. $attributes .'>';
3950            } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3951                if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3952                    if ($this->use_classes) {
3953                        $attrs = ' class="ln"';
3954                    } else {
3955                        $attrs = ' style="'. $this->table_linenumber_style .'"';
3956                    }
3957                    $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3958                    // get linenumbers
3959                    // we don't merge it with the for below, since it should be better for
3960                    // memory consumption this way
3961                    // @todo: but... actually it would still be somewhat nice to merge the two loops
3962                    //        the mem peaks are at different positions
3963                    for ($i = 0; $i < $n; ++$i) {
3964                        $close = 0;
3965                        // fancy lines
3966                        if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3967                            $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3968                            // Set the attributes to style the line
3969                            if ($this->use_classes) {
3970                                $parsed_code .= '<span class="xtra li2"><span class="de2">';
3971                            } else {
3972                                // This style "covers up" the special styles set for special lines
3973                                // so that styles applied to special lines don't apply to the actual
3974                                // code on that line
3975                                $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3976                                                  .'<span style="' . $this->code_style .'">';
3977                            }
3978                            $close += 2;
3979                        }
3980                        //Is this some line with extra styles???
3981                        if (in_array($i + 1, $this->highlight_extra_lines)) {
3982                            if ($this->use_classes) {
3983                                if (isset($this->highlight_extra_lines_styles[$i])) {
3984                                    $parsed_code .= "<span class=\"xtra lx$i\">";
3985                                } else {
3986                                    $parsed_code .= "<span class=\"xtra ln-xtra\">";
3987                                }
3988                            } else {
3989                                $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3990                            }
3991                            ++$close;
3992                        }
3993                        $parsed_code .= $this->line_numbers_start + $i;
3994                        if ($close) {
3995                            $parsed_code .= str_repeat('</span>', $close);
3996                        } elseif ($i != $n) {
3997                            $parsed_code .= "\n";
3998                        }
3999                    }
4000                    $parsed_code .= '</pre></td><td'.$attributes.'>';
4001                }
4002                $parsed_code .= '<pre'. $attributes .'>';
4003            }
4004            // No line numbers, but still need to handle highlighting lines extra.
4005            // Have to use divs so the full width of the code is highlighted
4006            $close = 0;
4007            for ($i = 0; $i < $n; ++$i) {
4008                // Make lines have at least one space in them if they're empty
4009                // BenBE: Checking emptiness using trim instead of relying on blanks
4010                if ('' == trim($code[$i])) {
4011                    $code[$i] = '&nbsp;';
4012                }
4013                // fancy lines
4014                if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
4015                    $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
4016                    // Set the attributes to style the line
4017                    if ($this->use_classes) {
4018                        $parsed_code .= '<span class="xtra li2"><span class="de2">';
4019                    } else {
4020                        // This style "covers up" the special styles set for special lines
4021                        // so that styles applied to special lines don't apply to the actual
4022                        // code on that line
4023                        $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
4024                                          .'<span style="' . $this->code_style .'">';
4025                    }
4026                    $close += 2;
4027                }
4028                //Is this some line with extra styles???
4029                if (in_array($i + 1, $this->highlight_extra_lines)) {
4030                    if ($this->use_classes) {
4031                        if (isset($this->highlight_extra_lines_styles[$i])) {
4032                            $parsed_code .= "<span class=\"xtra lx$i\">";
4033                        } else {
4034                            $parsed_code .= "<span class=\"xtra ln-xtra\">";
4035                        }
4036                    } else {
4037                        $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
4038                    }
4039                    ++$close;
4040                }
4041
4042                $parsed_code .= $code[$i];
4043
4044                if ($close) {
4045                  $parsed_code .= str_repeat('</span>', $close);
4046                  $close = 0;
4047                }
4048                if ($i + 1 < $n) {
4049                    $parsed_code .= "\n";
4050                }
4051                unset($code[$i]);
4052            }
4053
4054            if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
4055                $parsed_code .= '</pre>';
4056            }
4057            if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4058                $parsed_code .= '</td>';
4059            }
4060        }
4061
4062        $parsed_code .= $this->footer();
4063    }
4064
4065    /**
4066     * Creates the header for the code block (with correct attributes)
4067     *
4068     * @return string The header for the code block
4069     * @since  1.0.0
4070     */
4071    protected function header() {
4072        // Get attributes needed
4073        /**
4074         * @todo   Document behaviour change - class is outputted regardless of whether
4075         *         we're using classes or not. Same with style
4076         */
4077        $attributes = ' class="' . $this->_genCSSName($this->language);
4078        if ($this->overall_class != '') {
4079            $attributes .= " ".$this->_genCSSName($this->overall_class);
4080        }
4081        $attributes .= '"';
4082
4083        if ($this->overall_id != '') {
4084            $attributes .= " id=\"{$this->overall_id}\"";
4085        }
4086        if ($this->overall_style != '' && !$this->use_classes) {
4087            $attributes .= ' style="' . $this->overall_style . '"';
4088        }
4089
4090        $ol_attributes = '';
4091
4092        if ($this->line_numbers_start != 1) {
4093            $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
4094        }
4095
4096        // Get the header HTML
4097        $header = $this->header_content;
4098        if ($header) {
4099            if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
4100                $header = str_replace("\n", '', $header);
4101            }
4102            $header = $this->replace_keywords($header);
4103
4104            if ($this->use_classes) {
4105                $attr = ' class="head"';
4106            } else {
4107                $attr = " style=\"{$this->header_content_style}\"";
4108            }
4109            if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4110                $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
4111            } else {
4112                $header = "<div$attr>$header</div>";
4113            }
4114        }
4115
4116        if (GESHI_HEADER_NONE == $this->header_type) {
4117            if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4118                return "$header<ol$attributes$ol_attributes>";
4119            }
4120            return $header . ($this->force_code_block ? '<div>' : '');
4121        }
4122
4123        // Work out what to return and do it
4124        if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4125            if ($this->header_type == GESHI_HEADER_PRE) {
4126                return "<pre$attributes>$header<ol$ol_attributes>";
4127            } elseif ($this->header_type == GESHI_HEADER_DIV ||
4128                $this->header_type == GESHI_HEADER_PRE_VALID) {
4129                return "<div$attributes>$header<ol$ol_attributes>";
4130            } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4131                return "<table$attributes>$header<tbody><tr class=\"li1\">";
4132            }
4133        } else {
4134            if ($this->header_type == GESHI_HEADER_PRE) {
4135                return "<pre$attributes>$header"  .
4136                    ($this->force_code_block ? '<div>' : '');
4137            } else {
4138                return "<div$attributes>$header" .
4139                    ($this->force_code_block ? '<div>' : '');
4140            }
4141        }
4142    }
4143
4144    /**
4145     * Returns the footer for the code block.
4146     *
4147     * @return string The footer for the code block
4148     * @since  1.0.0
4149     */
4150    protected function footer() {
4151        $footer = $this->footer_content;
4152        if ($footer) {
4153            if ($this->header_type == GESHI_HEADER_PRE) {
4154                $footer = str_replace("\n", '', $footer);;
4155            }
4156            $footer = $this->replace_keywords($footer);
4157
4158            if ($this->use_classes) {
4159                $attr = ' class="foot"';
4160            } else {
4161                $attr = " style=\"{$this->footer_content_style}\"";
4162            }
4163            if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4164                $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4165            } else {
4166                $footer = "<div$attr>$footer</div>";
4167            }
4168        }
4169
4170        if (GESHI_HEADER_NONE == $this->header_type) {
4171            return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
4172        }
4173
4174        if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
4175            if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4176                return "</ol>$footer</div>";
4177            }
4178            return ($this->force_code_block ? '</div>' : '') .
4179                "$footer</div>";
4180        }
4181        elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4182            if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4183                return "</tr></tbody>$footer</table>";
4184            }
4185            return ($this->force_code_block ? '</div>' : '') .
4186                "$footer</div>";
4187        }
4188        else {
4189            if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4190                return "</ol>$footer</pre>";
4191            }
4192            return ($this->force_code_block ? '</div>' : '') .
4193                "$footer</pre>";
4194        }
4195    }
4196
4197    /**
4198     * Replaces certain keywords in the header and footer with
4199     * certain configuration values
4200     *
4201     * @param  string $instr The header or footer content to do replacement on
4202     * @return string The header or footer with replaced keywords
4203     * @since  1.0.2
4204     */
4205    protected function replace_keywords($instr) {
4206        $keywords = $replacements = array();
4207
4208        $keywords[] = '<TIME>';
4209        $keywords[] = '{TIME}';
4210        $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4211
4212        $keywords[] = '<LANGUAGE>';
4213        $keywords[] = '{LANGUAGE}';
4214        $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4215
4216        $keywords[] = '<VERSION>';
4217        $keywords[] = '{VERSION}';
4218        $replacements[] = $replacements[] = GESHI_VERSION;
4219
4220        $keywords[] = '<SPEED>';
4221        $keywords[] = '{SPEED}';
4222        if ($time <= 0) {
4223            $speed = 'N/A';
4224        } else {
4225            $speed = strlen($this->source) / $time;
4226            if ($speed >= 1024) {
4227                $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4228            } else {
4229                $speed = sprintf("%.0f B/s", $speed);
4230            }
4231        }
4232        $replacements[] = $replacements[] = $speed;
4233
4234        return str_replace($keywords, $replacements, $instr);
4235    }
4236
4237    /**
4238     * Secure replacement for PHP built-in function htmlspecialchars().
4239     *
4240     * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
4241     * for this replacement function.
4242     *
4243     * The INTERFACE for this function is almost the same as that for
4244     * htmlspecialchars(), with the same default for quote style; however, there
4245     * is no 'charset' parameter. The reason for this is as follows:
4246     *
4247     * The PHP docs say:
4248     *      "The third argument charset defines character set used in conversion."
4249     *
4250     * I suspect PHP's htmlspecialchars() is working at the byte-value level and
4251     * thus _needs_ to know (or asssume) a character set because the special
4252     * characters to be replaced could exist at different code points in
4253     * different character sets. (If indeed htmlspecialchars() works at
4254     * byte-value level that goes some  way towards explaining why the
4255     * vulnerability would exist in this function, too, and not only in
4256     * htmlentities() which certainly is working at byte-value level.)
4257     *
4258     * This replacement function however works at character level and should
4259     * therefore be "immune" to character set differences - so no charset
4260     * parameter is needed or provided. If a third parameter is passed, it will
4261     * be silently ignored.
4262     *
4263     * In the OUTPUT there is a minor difference in that we use '&#39;' instead
4264     * of PHP's '&#039;' for a single quote: this provides compatibility with
4265     *      get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
4266     * (see comment by mikiwoz at yahoo dot co dot uk on
4267     * http://php.net/htmlspecialchars); it also matches the entity definition
4268     * for XML 1.0
4269     * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
4270     * Like PHP we use a numeric character reference instead of '&apos;' for the
4271     * single quote. For the other special characters we use the named entity
4272     * references, as PHP is doing.
4273     *
4274     * @author      {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
4275     *
4276     * @license     http://www.gnu.org/copyleft/lgpl.html
4277     *              GNU Lesser General Public License
4278     * @copyright   Copyright 2007, {@link http://wikkawiki.org/CreditsPage
4279     *              Wikka Development Team}
4280     *
4281     * @param       string  $string string to be converted
4282     * @param       integer $quote_style
4283     *                      - ENT_COMPAT:   escapes &, <, > and double quote (default)
4284     *                      - ENT_NOQUOTES: escapes only &, < and >
4285     *                      - ENT_QUOTES:   escapes &, <, >, double and single quotes
4286     * @return      string  converted string
4287     * @since       1.0.7.18
4288     */
4289    protected function hsc($string, $quote_style = ENT_COMPAT) {
4290        // init
4291        static $aTransSpecchar = array(
4292            '&' => '&amp;',
4293            '"' => '&quot;',
4294            '<' => '&lt;',
4295            '>' => '&gt;',
4296
4297            //This fix is related to SF#1923020, but has to be applied
4298            //regardless of actually highlighting symbols.
4299
4300            //Circumvent a bug with symbol highlighting
4301            //This is required as ; would produce undesirable side-effects if it
4302            //was not to be processed as an entity.
4303            ';' => '<SEMI>', // Force ; to be processed as entity
4304            '|' => '<PIPE>' // Force | to be processed as entity
4305            );                      // ENT_COMPAT set
4306
4307        switch ($quote_style) {
4308            case ENT_NOQUOTES: // don't convert double quotes
4309                unset($aTransSpecchar['"']);
4310                break;
4311            case ENT_QUOTES: // convert single quotes as well
4312                $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
4313                break;
4314        }
4315
4316        // return translated string
4317        return strtr($string, $aTransSpecchar);
4318    }
4319
4320    /**
4321     * Generate a CSS class name from a given string.
4322     * Prevents invalid CSS classes.
4323     *
4324     * @param string $name Proposed class name
4325     *
4326     * @return string Safe CSS class name
4327     */
4328    protected function _genCSSName($name) {
4329        return (is_numeric($name[0]) ? '_' : '') . $name;
4330    }
4331
4332    /**
4333     * Returns a stylesheet for the highlighted code. If $economy mode
4334     * is true, we only return the stylesheet declarations that matter for
4335     * this code block instead of the whole thing
4336     *
4337     * @param  boolean $economy_mode Whether to use economy mode or not
4338     * @return string A stylesheet built on the data for the current language
4339     * @since  1.0.0
4340     */
4341    public function get_stylesheet($economy_mode = true) {
4342        // If there's an error, chances are that the language file
4343        // won't have populated the language data file, so we can't
4344        // risk getting a stylesheet...
4345        if ($this->error) {
4346            return '';
4347        }
4348
4349        //Check if the style rearrangements have been processed ...
4350        //This also does some preprocessing to check which style groups are useable ...
4351        if(!isset($this->language_data['NUMBERS_CACHE'])) {
4352            $this->build_style_cache();
4353        }
4354
4355        // First, work out what the selector should be. If there's an ID,
4356        // that should be used, the same for a class. Otherwise, a selector
4357        // of '' means that these styles will be applied anywhere
4358        if ($this->overall_id) {
4359            $selector = '#' . $this->_genCSSName($this->overall_id);
4360        } else {
4361            $selector = '.' . $this->_genCSSName($this->language);
4362            if ($this->overall_class) {
4363                $selector .= '.' . $this->_genCSSName($this->overall_class);
4364            }
4365        }
4366        $selector .= ' ';
4367
4368        // Header of the stylesheet
4369        if (!$economy_mode) {
4370            $stylesheet = "/**\n".
4371                " * GeSHi Dynamically Generated Stylesheet\n".
4372                " * --------------------------------------\n".
4373                " * Dynamically generated stylesheet for {$this->language}\n".
4374                " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4375                " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" .
4376                " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4377                " * --------------------------------------\n".
4378                " */\n";
4379        } else {
4380            $stylesheet = "/**\n".
4381                " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" .
4382                " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4383                " */\n";
4384        }
4385
4386        // Set the <ol> to have no effect at all if there are line numbers
4387        // (<ol>s have margins that should be destroyed so all layout is
4388        // controlled by the set_overall_style method, which works on the
4389        // <pre> or <div> container). Additionally, set default styles for lines
4390        if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4391            //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4392            $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4393        }
4394
4395        // Add overall styles
4396        // note: neglect economy_mode, empty styles are meaningless
4397        if ($this->overall_style != '') {
4398            $stylesheet .= "$selector {{$this->overall_style}}\n";
4399        }
4400
4401        // Add styles for links
4402        // note: economy mode does not make _any_ sense here
4403        //       either the style is empty and thus no selector is needed
4404        //       or the appropriate key is given.
4405        foreach ($this->link_styles as $key => $style) {
4406            if ($style != '') {
4407                switch ($key) {
4408                    case GESHI_LINK:
4409                        $stylesheet .= "{$selector}a:link {{$style}}\n";
4410                        break;
4411                    case GESHI_HOVER:
4412                        $stylesheet .= "{$selector}a:hover {{$style}}\n";
4413                        break;
4414                    case GESHI_ACTIVE:
4415                        $stylesheet .= "{$selector}a:active {{$style}}\n";
4416                        break;
4417                    case GESHI_VISITED:
4418                        $stylesheet .= "{$selector}a:visited {{$style}}\n";
4419                        break;
4420                }
4421            }
4422        }
4423
4424        // Header and footer
4425        // note: neglect economy_mode, empty styles are meaningless
4426        if ($this->header_content_style != '') {
4427            $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4428        }
4429        if ($this->footer_content_style != '') {
4430            $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4431        }
4432
4433        // Styles for important stuff
4434        // note: neglect economy_mode, empty styles are meaningless
4435        if ($this->important_styles != '') {
4436            $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4437        }
4438
4439        // Simple line number styles
4440        if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4441            $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4442        }
4443        if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4444            $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4445        }
4446        // If there is a style set for fancy line numbers, echo it out
4447        if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4448            $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4449        }
4450
4451        // note: empty styles are meaningless
4452        foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4453            if ($styles != '' && (!$economy_mode ||
4454                (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4455                $this->lexic_permissions['KEYWORDS'][$group]))) {
4456                $stylesheet .= "$selector.kw$group {{$styles}}\n";
4457            }
4458        }
4459        foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4460            if ($styles != '' && (!$economy_mode ||
4461                (isset($this->lexic_permissions['COMMENTS'][$group]) &&
4462                $this->lexic_permissions['COMMENTS'][$group]) ||
4463                (!empty($this->language_data['COMMENT_REGEXP']) &&
4464                !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4465                $stylesheet .= "$selector.co$group {{$styles}}\n";
4466            }
4467        }
4468        foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4469            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
4470                // NEW: since 1.0.8 we have to handle hardescapes
4471                if ($group === 'HARD') {
4472                    $group = '_h';
4473                }
4474                $stylesheet .= "$selector.es$group {{$styles}}\n";
4475            }
4476        }
4477        foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4478            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
4479                $stylesheet .= "$selector.br$group {{$styles}}\n";
4480            }
4481        }
4482        foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4483            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
4484                $stylesheet .= "$selector.sy$group {{$styles}}\n";
4485            }
4486        }
4487        foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4488            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
4489                // NEW: since 1.0.8 we have to handle hardquotes
4490                if ($group === 'HARD') {
4491                    $group = '_h';
4492                }
4493                $stylesheet .= "$selector.st$group {{$styles}}\n";
4494            }
4495        }
4496        foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4497            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
4498                $stylesheet .= "$selector.nu$group {{$styles}}\n";
4499            }
4500        }
4501        foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4502            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
4503                $stylesheet .= "$selector.me$group {{$styles}}\n";
4504            }
4505        }
4506        // note: neglect economy_mode, empty styles are meaningless
4507        foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4508            if ($styles != '') {
4509                $stylesheet .= "$selector.sc$group {{$styles}}\n";
4510            }
4511        }
4512        foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4513            if ($styles != '' && (!$economy_mode ||
4514                (isset($this->lexic_permissions['REGEXPS'][$group]) &&
4515                $this->lexic_permissions['REGEXPS'][$group]))) {
4516                if (is_array($this->language_data['REGEXPS'][$group]) &&
4517                    array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4518                    $stylesheet .= "$selector.";
4519                    $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4520                    $stylesheet .= " {{$styles}}\n";
4521                } else {
4522                    $stylesheet .= "$selector.re$group {{$styles}}\n";
4523                }
4524            }
4525        }
4526        // Styles for lines being highlighted extra
4527        if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4528            $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4529        }
4530        $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4531        foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4532            $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4533        }
4534
4535        return $stylesheet;
4536    }
4537
4538    /**
4539     * Get's the style that is used for the specified line
4540     *
4541     * @param int $line The line number information is requested for
4542     * @since 1.0.7.21
4543     */
4544    protected function get_line_style($line) {
4545        $style = null;
4546        if (isset($this->highlight_extra_lines_styles[$line])) {
4547            $style = $this->highlight_extra_lines_styles[$line];
4548        } else { // if no "extra" style assigned
4549            $style = $this->highlight_extra_lines_style;
4550        }
4551
4552        return $style;
4553    }
4554
4555    /**
4556    * this functions creates an optimized regular expression list
4557    * of an array of strings.
4558    *
4559    * Example:
4560    * <code>$list = array('faa', 'foo', 'foobar');
4561    *          => string 'f(aa|oo(bar)?)'</code>
4562    *
4563    * @param array  $list             array of (unquoted) strings
4564    * @param string $regexp_delimiter your regular expression delimiter, @see preg_quote()
4565    * @return string for regular expression
4566    * @author Milian Wolff <mail@milianw.de>
4567    * @since 1.0.8
4568    */
4569    protected function optimize_regexp_list($list, $regexp_delimiter = '/') {
4570        $regex_chars = array('.', '\\', '+', '-', '*', '?', '[', '^', ']', '$',
4571            '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4572        sort($list);
4573        $regexp_list = array('');
4574        $num_subpatterns = 0;
4575        $list_key = 0;
4576
4577        // the tokens which we will use to generate the regexp list
4578        $tokens = array();
4579        $prev_keys = array();
4580        // go through all entries of the list and generate the token list
4581        $cur_len = 0;
4582        for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4583            if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4584                // seems like the length of this pcre is growing exorbitantly
4585                $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4586                $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4587                $tokens = array();
4588                $cur_len = 0;
4589            }
4590            $level = 0;
4591            $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4592            $pointer = &$tokens;
4593            // properly assign the new entry to the correct position in the token array
4594            // possibly generate smaller common denominator keys
4595            while (true) {
4596                // get the common denominator
4597                if (isset($prev_keys[$level])) {
4598                    if ($prev_keys[$level] == $entry) {
4599                        // this is a duplicate entry, skip it
4600                        continue 2;
4601                    }
4602                    $char = 0;
4603                    while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4604                            && $entry[$char] == $prev_keys[$level][$char]) {
4605                        ++$char;
4606                    }
4607                    if ($char > 0) {
4608                        // this entry has at least some chars in common with the current key
4609                        if ($char == strlen($prev_keys[$level])) {
4610                            // current key is totally matched, i.e. this entry has just some bits appended
4611                            $pointer = &$pointer[$prev_keys[$level]];
4612                        } else {
4613                            // only part of the keys match
4614                            $new_key_part1 = substr($prev_keys[$level], 0, $char);
4615                            $new_key_part2 = substr($prev_keys[$level], $char);
4616
4617                            if (in_array($new_key_part1[0], $regex_chars)
4618                                || in_array($new_key_part2[0], $regex_chars)) {
4619                                // this is bad, a regex char as first character
4620                                $pointer[$entry] = array('' => true);
4621                                array_splice($prev_keys, $level, count($prev_keys), $entry);
4622                                $cur_len += strlen($entry);
4623                                continue;
4624                            } else {
4625                                // relocate previous tokens
4626                                $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4627                                unset($pointer[$prev_keys[$level]]);
4628                                $pointer = &$pointer[$new_key_part1];
4629                                // recreate key index
4630                                array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4631                                $cur_len += strlen($new_key_part2);
4632                            }
4633                        }
4634                        ++$level;
4635                        $entry = substr($entry, $char);
4636                        continue;
4637                    }
4638                    // else: fall trough, i.e. no common denominator was found
4639                }
4640                if ($level == 0 && !empty($tokens)) {
4641                    // we can dump current tokens into the string and throw them away afterwards
4642                    $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4643                    $new_subpatterns = substr_count($new_entry, '(?:');
4644                    if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4645                        $regexp_list[++$list_key] = $new_entry;
4646                        $num_subpatterns = $new_subpatterns;
4647                    } else {
4648                        if (!empty($regexp_list[$list_key])) {
4649                            $new_entry = '|' . $new_entry;
4650                        }
4651                        $regexp_list[$list_key] .= $new_entry;
4652                        $num_subpatterns += $new_subpatterns;
4653                    }
4654                    $tokens = array();
4655                    $cur_len = 0;
4656                }
4657                // no further common denominator found
4658                $pointer[$entry] = array('' => true);
4659                array_splice($prev_keys, $level, count($prev_keys), $entry);
4660
4661                $cur_len += strlen($entry);
4662                break;
4663            }
4664            unset($list[$i]);
4665        }
4666        // make sure the last tokens get converted as well
4667        $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4668        if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4669            if ( !empty($regexp_list[$list_key]) ) {
4670              ++$list_key;
4671            }
4672            $regexp_list[$list_key] = $new_entry;
4673        } else {
4674            if (!empty($regexp_list[$list_key])) {
4675                $new_entry = '|' . $new_entry;
4676            }
4677            $regexp_list[$list_key] .= $new_entry;
4678        }
4679        return $regexp_list;
4680    }
4681
4682    /**
4683    * this function creates the appropriate regexp string of an token array
4684    * you should not call this function directly, @see $this->optimize_regexp_list().
4685    *
4686    * @param array $tokens   array of tokens
4687    * @param bool  $recursed to know wether we recursed or not
4688    * @return string
4689    * @author Milian Wolff <mail@milianw.de>
4690    * @since 1.0.8
4691    */
4692    protected function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4693        $list = '';
4694        foreach ($tokens as $token => $sub_tokens) {
4695            $list .= $token;
4696            $close_entry = isset($sub_tokens['']);
4697            unset($sub_tokens['']);
4698            if (!empty($sub_tokens)) {
4699                $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4700                if ($close_entry) {
4701                    // make sub_tokens optional
4702                    $list .= '?';
4703                }
4704            }
4705            $list .= '|';
4706        }
4707        if (!$recursed) {
4708            // do some optimizations
4709            // common trailing strings
4710            // BUGGY!
4711            //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4712            //    '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4713            // (?:p)? => p?
4714            $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4715            // (?:a|b|c|d|...)? => [abcd...]?
4716            // TODO: a|bb|c => [ac]|bb
4717            static $callback_2;
4718            if (!isset($callback_2)) {
4719                $callback_2 = function($matches) {
4720                    return "[" . str_replace("|", "", $matches[1]) . "]";
4721                };
4722            }
4723            $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
4724        }
4725        // return $list without trailing pipe
4726        return substr($list, 0, -1);
4727    }
4728} // End Class GeSHi
4729
4730
4731if (!function_exists('geshi_highlight')) {
4732    /**
4733     * Easy way to highlight stuff. Behaves just like highlight_string
4734     *
4735     * @param string $string   The code to highlight
4736     * @param string $language The language to highlight the code in
4737     * @param string $path     The path to the language files. You can leave this blank if you need
4738     *                         as from version 1.0.7 the path should be automatically detected
4739     * @param boolean $return  Whether to return the result or to echo
4740     * @return string The code highlighted (if $return is true)
4741     * @since 1.0.2
4742     */
4743    function geshi_highlight($string, $language, $path = null, $return = false) {
4744        $geshi = new GeSHi($string, $language, $path);
4745        $geshi->set_header_type(GESHI_HEADER_NONE);
4746
4747        if ($return) {
4748            return '<code>' . $geshi->parse_code() . '</code>';
4749        }
4750
4751        echo '<code>' . $geshi->parse_code() . '</code>';
4752
4753        if ($geshi->error()) {
4754            return false;
4755        }
4756        return true;
4757    }
4758}
4759