1
2var IN_GLOBAL_SCOPE = false;
3
4/**
5 * @license
6 * Copyright (C) 2006 Google Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21/**
22 * @fileoverview
23 * some functions for browser-side pretty printing of code contained in html.
24 *
25 * <p>
26 * For a fairly comprehensive set of languages see the
27 * <a href="https://github.com/google/code-prettify#for-which-languages-does-it-work">README</a>
28 * file that came with this source.  At a minimum, the lexer should work on a
29 * number of languages including C and friends, Java, Python, Bash, SQL, HTML,
30 * XML, CSS, Javascript, and Makefiles.  It works passably on Ruby, PHP and Awk
31 * and a subset of Perl, but, because of commenting conventions, doesn't work on
32 * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class.
33 * <p>
34 * Usage: <ol>
35 * <li> include this source file in an html page via
36 *   {@code <script type="text/javascript" src="/path/to/prettify.js"></script>}
37 * <li> define style rules.  See the example page for examples.
38 * <li> mark the {@code <pre>} and {@code <code>} tags in your source with
39 *    {@code class=prettyprint.}
40 *    You can also use the (html deprecated) {@code <xmp>} tag, but the pretty
41 *    printer needs to do more substantial DOM manipulations to support that, so
42 *    some css styles may not be preserved.
43 * </ol>
44 * That's it.  I wanted to keep the API as simple as possible, so there's no
45 * need to specify which language the code is in, but if you wish, you can add
46 * another class to the {@code <pre>} or {@code <code>} element to specify the
47 * language, as in {@code <pre class="prettyprint lang-java">}.  Any class that
48 * starts with "lang-" followed by a file extension, specifies the file type.
49 * See the "lang-*.js" files in this directory for code that implements
50 * per-language file handlers.
51 * <p>
52 * Change log:<br>
53 * cbeust, 2006/08/22
54 * <blockquote>
55 *   Java annotations (start with "@") are now captured as literals ("lit")
56 * </blockquote>
57 * @requires console
58 */
59
60// JSLint declarations
61/*global console, document, navigator, setTimeout, window, define */
62
63
64/**
65 * {@type !{
66 *   'createSimpleLexer': function (Array, Array): (function (JobT)),
67 *   'registerLangHandler': function (function (JobT), Array.<string>),
68 *   'PR_ATTRIB_NAME': string,
69 *   'PR_ATTRIB_NAME': string,
70 *   'PR_ATTRIB_VALUE': string,
71 *   'PR_COMMENT': string,
72 *   'PR_DECLARATION': string,
73 *   'PR_KEYWORD': string,
74 *   'PR_LITERAL': string,
75 *   'PR_NOCODE': string,
76 *   'PR_PLAIN': string,
77 *   'PR_PUNCTUATION': string,
78 *   'PR_SOURCE': string,
79 *   'PR_STRING': string,
80 *   'PR_TAG': string,
81 *   'PR_TYPE': string,
82 *   'prettyPrintOne': function (string, string, number|boolean),
83 *   'prettyPrint': function (?function, ?(HTMLElement|HTMLDocument))
84 * }}
85 * @const
86 */
87var PR;
88
89/**
90 * Split {@code prettyPrint} into multiple timeouts so as not to interfere with
91 * UI events.
92 * If set to {@code false}, {@code prettyPrint()} is synchronous.
93 */
94var PR_SHOULD_USE_CONTINUATION = true
95if (typeof window !== 'undefined') {
96  window['PR_SHOULD_USE_CONTINUATION'] = PR_SHOULD_USE_CONTINUATION;
97}
98
99/**
100 * Pretty print a chunk of code.
101 * @param {string} sourceCodeHtml The HTML to pretty print.
102 * @param {string} opt_langExtension The language name to use.
103 *     Typically, a filename extension like 'cpp' or 'java'.
104 * @param {number|boolean} opt_numberLines True to number lines,
105 *     or the 1-indexed number of the first line in sourceCodeHtml.
106 * @return {string} code as html, but prettier
107 */
108var prettyPrintOne;
109/**
110 * Find all the {@code <pre>} and {@code <code>} tags in the DOM with
111 * {@code class=prettyprint} and prettify them.
112 *
113 * @param {Function} opt_whenDone called when prettifying is done.
114 * @param {HTMLElement|HTMLDocument} opt_root an element or document
115 *   containing all the elements to pretty print.
116 *   Defaults to {@code document.body}.
117 */
118var prettyPrint;
119
120
121(function () {
122  var win = (typeof window !== 'undefined') ? window : {};
123  // Keyword lists for various languages.
124  // We use things that coerce to strings to make them compact when minified
125  // and to defeat aggressive optimizers that fold large string constants.
126  var FLOW_CONTROL_KEYWORDS = ["break,continue,do,else,for,if,return,while"];
127  var C_KEYWORDS = [FLOW_CONTROL_KEYWORDS,"auto,case,char,const,default," +
128      "double,enum,extern,float,goto,inline,int,long,register,restrict,short,signed," +
129      "sizeof,static,struct,switch,typedef,union,unsigned,void,volatile"];
130  var COMMON_KEYWORDS = [C_KEYWORDS,"catch,class,delete,false,import," +
131      "new,operator,private,protected,public,this,throw,true,try,typeof"];
132  var CPP_KEYWORDS = [COMMON_KEYWORDS,"alignas,alignof,align_union,asm,axiom,bool," +
133      "concept,concept_map,const_cast,constexpr,decltype,delegate," +
134      "dynamic_cast,explicit,export,friend,generic,late_check," +
135      "mutable,namespace,noexcept,noreturn,nullptr,property,reinterpret_cast,static_assert," +
136      "static_cast,template,typeid,typename,using,virtual,where"];
137  var JAVA_KEYWORDS = [COMMON_KEYWORDS,
138      "abstract,assert,boolean,byte,extends,finally,final,implements,import," +
139      "instanceof,interface,null,native,package,strictfp,super,synchronized," +
140      "throws,transient"];
141  var CSHARP_KEYWORDS = [COMMON_KEYWORDS,
142      "abstract,add,alias,as,ascending,async,await,base,bool,by,byte,checked,decimal,delegate,descending," +
143      "dynamic,event,finally,fixed,foreach,from,get,global,group,implicit,in,interface," +
144      "internal,into,is,join,let,lock,null,object,out,override,orderby,params," +
145      "partial,readonly,ref,remove,sbyte,sealed,select,set,stackalloc,string,select,uint,ulong," +
146      "unchecked,unsafe,ushort,value,var,virtual,where,yield"];
147  var COFFEE_KEYWORDS = "all,and,by,catch,class,else,extends,false,finally," +
148      "for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then," +
149      "throw,true,try,unless,until,when,while,yes";
150  var JSCRIPT_KEYWORDS = [COMMON_KEYWORDS,
151      "abstract,async,await,constructor,debugger,enum,eval,export,from,function," +
152      "get,import,implements,instanceof,interface,let,null,of,set,undefined," +
153      "var,with,yield,Infinity,NaN"];
154  var PERL_KEYWORDS = "caller,delete,die,do,dump,elsif,eval,exit,foreach,for," +
155      "goto,if,import,last,local,my,next,no,our,print,package,redo,require," +
156      "sub,undef,unless,until,use,wantarray,while,BEGIN,END";
157  var PYTHON_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "and,as,assert,class,def,del," +
158      "elif,except,exec,finally,from,global,import,in,is,lambda," +
159      "nonlocal,not,or,pass,print,raise,try,with,yield," +
160      "False,True,None"];
161  var RUBY_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "alias,and,begin,case,class," +
162      "def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo," +
163      "rescue,retry,self,super,then,true,undef,unless,until,when,yield," +
164      "BEGIN,END"];
165  var SH_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "case,done,elif,esac,eval,fi," +
166      "function,in,local,set,then,until"];
167  var ALL_KEYWORDS = [
168      CPP_KEYWORDS, CSHARP_KEYWORDS, JAVA_KEYWORDS, JSCRIPT_KEYWORDS,
169      PERL_KEYWORDS, PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS];
170  var C_TYPES = /^(DIR|FILE|array|vector|(de|priority_)?queue|(forward_)?list|stack|(const_)?(reverse_)?iterator|(unordered_)?(multi)?(set|map)|bitset|u?(int|float)\d*)\b/;
171
172  // token style names.  correspond to css classes
173  /**
174   * token style for a string literal
175   * @const
176   */
177  var PR_STRING = 'str';
178  /**
179   * token style for a keyword
180   * @const
181   */
182  var PR_KEYWORD = 'kwd';
183  /**
184   * token style for a comment
185   * @const
186   */
187  var PR_COMMENT = 'com';
188  /**
189   * token style for a type
190   * @const
191   */
192  var PR_TYPE = 'typ';
193  /**
194   * token style for a literal value.  e.g. 1, null, true.
195   * @const
196   */
197  var PR_LITERAL = 'lit';
198  /**
199   * token style for a punctuation string.
200   * @const
201   */
202  var PR_PUNCTUATION = 'pun';
203  /**
204   * token style for plain text.
205   * @const
206   */
207  var PR_PLAIN = 'pln';
208
209  /**
210   * token style for an sgml tag.
211   * @const
212   */
213  var PR_TAG = 'tag';
214  /**
215   * token style for a markup declaration such as a DOCTYPE.
216   * @const
217   */
218  var PR_DECLARATION = 'dec';
219  /**
220   * token style for embedded source.
221   * @const
222   */
223  var PR_SOURCE = 'src';
224  /**
225   * token style for an sgml attribute name.
226   * @const
227   */
228  var PR_ATTRIB_NAME = 'atn';
229  /**
230   * token style for an sgml attribute value.
231   * @const
232   */
233  var PR_ATTRIB_VALUE = 'atv';
234
235  /**
236   * A class that indicates a section of markup that is not code, e.g. to allow
237   * embedding of line numbers within code listings.
238   * @const
239   */
240  var PR_NOCODE = 'nocode';
241
242
243  // Regex pattern below is automatically generated by regexpPrecederPatterns.pl
244  // Do not modify, your changes will be erased.
245
246  // CAVEAT: this does not properly handle the case where a regular
247  // expression immediately follows another since a regular expression may
248  // have flags for case-sensitivity and the like.  Having regexp tokens
249  // adjacent is not valid in any language I'm aware of, so I'm punting.
250  // TODO: maybe style special characters inside a regexp as punctuation.
251
252  /**
253   * A set of tokens that can precede a regular expression literal in
254   * javascript
255   * http://web.archive.org/web/20070717142515/http://www.mozilla.org/js/language/js20/rationale/syntax.html
256   * has the full list, but I've removed ones that might be problematic when
257   * seen in languages that don't support regular expression literals.
258   *
259   * Specifically, I've removed any keywords that can't precede a regexp
260   * literal in a syntactically legal javascript program, and I've removed the
261   * "in" keyword since it's not a keyword in many languages, and might be used
262   * as a count of inches.
263   *
264   * The link above does not accurately describe EcmaScript rules since
265   * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works
266   * very well in practice.
267   *
268   * @private
269   * @const
270   */
271  var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?|[+-]|[!=]=?=?|\\#|%=?|&&?=?|\\(|\\*=?|[+\\-]=|->|\\/=?|::?|<<?=?|>>?>?=?|,|;|\\?|@|\\[|~|{|\\^\\^?=?|\\|\\|?=?|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*';
272
273
274  /**
275   * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally
276   * matches the union of the sets of strings matched by the input RegExp.
277   * Since it matches globally, if the input strings have a start-of-input
278   * anchor (/^.../), it is ignored for the purposes of unioning.
279   * @param {Array.<RegExp>} regexs non multiline, non-global regexs.
280   * @return {RegExp} a global regex.
281   */
282  function combinePrefixPatterns(regexs) {
283    var capturedGroupIndex = 0;
284
285    var needToFoldCase = false;
286    var ignoreCase = false;
287    for (var i = 0, n = regexs.length; i < n; ++i) {
288      var regex = regexs[i];
289      if (regex.ignoreCase) {
290        ignoreCase = true;
291      } else if (/[a-z]/i.test(regex.source.replace(
292                     /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) {
293        needToFoldCase = true;
294        ignoreCase = false;
295        break;
296      }
297    }
298
299    var escapeCharToCodeUnit = {
300      'b': 8,
301      't': 9,
302      'n': 0xa,
303      'v': 0xb,
304      'f': 0xc,
305      'r': 0xd
306    };
307
308    function decodeEscape(charsetPart) {
309      var cc0 = charsetPart.charCodeAt(0);
310      if (cc0 !== 92 /* \\ */) {
311        return cc0;
312      }
313      var c1 = charsetPart.charAt(1);
314      cc0 = escapeCharToCodeUnit[c1];
315      if (cc0) {
316        return cc0;
317      } else if ('0' <= c1 && c1 <= '7') {
318        return parseInt(charsetPart.substring(1), 8);
319      } else if (c1 === 'u' || c1 === 'x') {
320        return parseInt(charsetPart.substring(2), 16);
321      } else {
322        return charsetPart.charCodeAt(1);
323      }
324    }
325
326    function encodeEscape(charCode) {
327      if (charCode < 0x20) {
328        return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16);
329      }
330      var ch = String.fromCharCode(charCode);
331      return (ch === '\\' || ch === '-' || ch === ']' || ch === '^')
332          ? "\\" + ch : ch;
333    }
334
335    function caseFoldCharset(charSet) {
336      var charsetParts = charSet.substring(1, charSet.length - 1).match(
337          new RegExp(
338              '\\\\u[0-9A-Fa-f]{4}'
339              + '|\\\\x[0-9A-Fa-f]{2}'
340              + '|\\\\[0-3][0-7]{0,2}'
341              + '|\\\\[0-7]{1,2}'
342              + '|\\\\[\\s\\S]'
343              + '|-'
344              + '|[^-\\\\]',
345              'g'));
346      var ranges = [];
347      var inverse = charsetParts[0] === '^';
348
349      var out = ['['];
350      if (inverse) { out.push('^'); }
351
352      for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) {
353        var p = charsetParts[i];
354        if (/\\[bdsw]/i.test(p)) {  // Don't muck with named groups.
355          out.push(p);
356        } else {
357          var start = decodeEscape(p);
358          var end;
359          if (i + 2 < n && '-' === charsetParts[i + 1]) {
360            end = decodeEscape(charsetParts[i + 2]);
361            i += 2;
362          } else {
363            end = start;
364          }
365          ranges.push([start, end]);
366          // If the range might intersect letters, then expand it.
367          // This case handling is too simplistic.
368          // It does not deal with non-latin case folding.
369          // It works for latin source code identifiers though.
370          if (!(end < 65 || start > 122)) {
371            if (!(end < 65 || start > 90)) {
372              ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]);
373            }
374            if (!(end < 97 || start > 122)) {
375              ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]);
376            }
377          }
378        }
379      }
380
381      // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]]
382      // -> [[1, 12], [14, 14], [16, 17]]
383      ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1]  - a[1]); });
384      var consolidatedRanges = [];
385      var lastRange = [];
386      for (var i = 0; i < ranges.length; ++i) {
387        var range = ranges[i];
388        if (range[0] <= lastRange[1] + 1) {
389          lastRange[1] = Math.max(lastRange[1], range[1]);
390        } else {
391          consolidatedRanges.push(lastRange = range);
392        }
393      }
394
395      for (var i = 0; i < consolidatedRanges.length; ++i) {
396        var range = consolidatedRanges[i];
397        out.push(encodeEscape(range[0]));
398        if (range[1] > range[0]) {
399          if (range[1] + 1 > range[0]) { out.push('-'); }
400          out.push(encodeEscape(range[1]));
401        }
402      }
403      out.push(']');
404      return out.join('');
405    }
406
407    function allowAnywhereFoldCaseAndRenumberGroups(regex) {
408      // Split into character sets, escape sequences, punctuation strings
409      // like ('(', '(?:', ')', '^'), and runs of characters that do not
410      // include any of the above.
411      var parts = regex.source.match(
412          new RegExp(
413              '(?:'
414              + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]'  // a character set
415              + '|\\\\u[A-Fa-f0-9]{4}'  // a unicode escape
416              + '|\\\\x[A-Fa-f0-9]{2}'  // a hex escape
417              + '|\\\\[0-9]+'  // a back-reference or octal escape
418              + '|\\\\[^ux0-9]'  // other escape sequence
419              + '|\\(\\?[:!=]'  // start of a non-capturing group
420              + '|[\\(\\)\\^]'  // start/end of a group, or line start
421              + '|[^\\x5B\\x5C\\(\\)\\^]+'  // run of other characters
422              + ')',
423              'g'));
424      var n = parts.length;
425
426      // Maps captured group numbers to the number they will occupy in
427      // the output or to -1 if that has not been determined, or to
428      // undefined if they need not be capturing in the output.
429      var capturedGroups = [];
430
431      // Walk over and identify back references to build the capturedGroups
432      // mapping.
433      for (var i = 0, groupIndex = 0; i < n; ++i) {
434        var p = parts[i];
435        if (p === '(') {
436          // groups are 1-indexed, so max group index is count of '('
437          ++groupIndex;
438        } else if ('\\' === p.charAt(0)) {
439          var decimalValue = +p.substring(1);
440          if (decimalValue) {
441            if (decimalValue <= groupIndex) {
442              capturedGroups[decimalValue] = -1;
443            } else {
444              // Replace with an unambiguous escape sequence so that
445              // an octal escape sequence does not turn into a backreference
446              // to a capturing group from an earlier regex.
447              parts[i] = encodeEscape(decimalValue);
448            }
449          }
450        }
451      }
452
453      // Renumber groups and reduce capturing groups to non-capturing groups
454      // where possible.
455      for (var i = 1; i < capturedGroups.length; ++i) {
456        if (-1 === capturedGroups[i]) {
457          capturedGroups[i] = ++capturedGroupIndex;
458        }
459      }
460      for (var i = 0, groupIndex = 0; i < n; ++i) {
461        var p = parts[i];
462        if (p === '(') {
463          ++groupIndex;
464          if (!capturedGroups[groupIndex]) {
465            parts[i] = '(?:';
466          }
467        } else if ('\\' === p.charAt(0)) {
468          var decimalValue = +p.substring(1);
469          if (decimalValue && decimalValue <= groupIndex) {
470            parts[i] = '\\' + capturedGroups[decimalValue];
471          }
472        }
473      }
474
475      // Remove any prefix anchors so that the output will match anywhere.
476      // ^^ really does mean an anchored match though.
477      for (var i = 0; i < n; ++i) {
478        if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; }
479      }
480
481      // Expand letters to groups to handle mixing of case-sensitive and
482      // case-insensitive patterns if necessary.
483      if (regex.ignoreCase && needToFoldCase) {
484        for (var i = 0; i < n; ++i) {
485          var p = parts[i];
486          var ch0 = p.charAt(0);
487          if (p.length >= 2 && ch0 === '[') {
488            parts[i] = caseFoldCharset(p);
489          } else if (ch0 !== '\\') {
490            // TODO: handle letters in numeric escapes.
491            parts[i] = p.replace(
492                /[a-zA-Z]/g,
493                function (ch) {
494                  var cc = ch.charCodeAt(0);
495                  return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']';
496                });
497          }
498        }
499      }
500
501      return parts.join('');
502    }
503
504    var rewritten = [];
505    for (var i = 0, n = regexs.length; i < n; ++i) {
506      var regex = regexs[i];
507      if (regex.global || regex.multiline) { throw new Error('' + regex); }
508      rewritten.push(
509          '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')');
510    }
511
512    return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g');
513  }
514
515
516  /**
517   * Split markup into a string of source code and an array mapping ranges in
518   * that string to the text nodes in which they appear.
519   *
520   * <p>
521   * The HTML DOM structure:</p>
522   * <pre>
523   * (Element   "p"
524   *   (Element "b"
525   *     (Text  "print "))       ; #1
526   *   (Text    "'Hello '")      ; #2
527   *   (Element "br")            ; #3
528   *   (Text    "  + 'World';")) ; #4
529   * </pre>
530   * <p>
531   * corresponds to the HTML
532   * {@code <p><b>print </b>'Hello '<br>  + 'World';</p>}.</p>
533   *
534   * <p>
535   * It will produce the output:</p>
536   * <pre>
537   * {
538   *   sourceCode: "print 'Hello '\n  + 'World';",
539   *   //                     1          2
540   *   //           012345678901234 5678901234567
541   *   spans: [0, #1, 6, #2, 14, #3, 15, #4]
542   * }
543   * </pre>
544   * <p>
545   * where #1 is a reference to the {@code "print "} text node above, and so
546   * on for the other text nodes.
547   * </p>
548   *
549   * <p>
550   * The {@code} spans array is an array of pairs.  Even elements are the start
551   * indices of substrings, and odd elements are the text nodes (or BR elements)
552   * that contain the text for those substrings.
553   * Substrings continue until the next index or the end of the source.
554   * </p>
555   *
556   * @param {Node} node an HTML DOM subtree containing source-code.
557   * @param {boolean|number} isPreformatted truthy if white-space in
558   *    text nodes should be considered significant.
559   * @return {SourceSpansT} source code and the nodes in which they occur.
560   */
561  function extractSourceSpans(node, isPreformatted) {
562    var nocode = /(?:^|\s)nocode(?:\s|$)/;
563
564    var chunks = [];
565    var length = 0;
566    var spans = [];
567    var k = 0;
568
569    function walk(node) {
570      var type = node.nodeType;
571      if (type == 1) {  // Element
572        if (nocode.test(node.className)) { return; }
573        for (var child = node.firstChild; child; child = child.nextSibling) {
574          walk(child);
575        }
576        var nodeName = node.nodeName.toLowerCase();
577        if ('br' === nodeName || 'li' === nodeName) {
578          chunks[k] = '\n';
579          spans[k << 1] = length++;
580          spans[(k++ << 1) | 1] = node;
581        }
582      } else if (type == 3 || type == 4) {  // Text
583        var text = node.nodeValue;
584        if (text.length) {
585          if (!isPreformatted) {
586            text = text.replace(/[ \t\r\n]+/g, ' ');
587          } else {
588            text = text.replace(/\r\n?/g, '\n');  // Normalize newlines.
589          }
590          // TODO: handle tabs here?
591          chunks[k] = text;
592          spans[k << 1] = length;
593          length += text.length;
594          spans[(k++ << 1) | 1] = node;
595        }
596      }
597    }
598
599    walk(node);
600
601    return {
602      sourceCode: chunks.join('').replace(/\n$/, ''),
603      spans: spans
604    };
605  }
606
607
608  /**
609   * Apply the given language handler to sourceCode and add the resulting
610   * decorations to out.
611   * @param {!Element} sourceNode
612   * @param {number} basePos the index of sourceCode within the chunk of source
613   *    whose decorations are already present on out.
614   * @param {string} sourceCode
615   * @param {function(JobT)} langHandler
616   * @param {DecorationsT} out
617   */
618  function appendDecorations(
619      sourceNode, basePos, sourceCode, langHandler, out) {
620    if (!sourceCode) { return; }
621    /** @type {JobT} */
622    var job = {
623      sourceNode: sourceNode,
624      pre: 1,
625      langExtension: null,
626      numberLines: null,
627      sourceCode: sourceCode,
628      spans: null,
629      basePos: basePos,
630      decorations: null
631    };
632    langHandler(job);
633    out.push.apply(out, job.decorations);
634  }
635
636  var notWs = /\S/;
637
638  /**
639   * Given an element, if it contains only one child element and any text nodes
640   * it contains contain only space characters, return the sole child element.
641   * Otherwise returns undefined.
642   * <p>
643   * This is meant to return the CODE element in {@code <pre><code ...>} when
644   * there is a single child element that contains all the non-space textual
645   * content, but not to return anything where there are multiple child elements
646   * as in {@code <pre><code>...</code><code>...</code></pre>} or when there
647   * is textual content.
648   */
649  function childContentWrapper(element) {
650    var wrapper = undefined;
651    for (var c = element.firstChild; c; c = c.nextSibling) {
652      var type = c.nodeType;
653      wrapper = (type === 1)  // Element Node
654          ? (wrapper ? element : c)
655          : (type === 3)  // Text Node
656          ? (notWs.test(c.nodeValue) ? element : wrapper)
657          : wrapper;
658    }
659    return wrapper === element ? undefined : wrapper;
660  }
661
662  /** Given triples of [style, pattern, context] returns a lexing function,
663    * The lexing function interprets the patterns to find token boundaries and
664    * returns a decoration list of the form
665    * [index_0, style_0, index_1, style_1, ..., index_n, style_n]
666    * where index_n is an index into the sourceCode, and style_n is a style
667    * constant like PR_PLAIN.  index_n-1 <= index_n, and style_n-1 applies to
668    * all characters in sourceCode[index_n-1:index_n].
669    *
670    * The stylePatterns is a list whose elements have the form
671    * [style : string, pattern : RegExp, DEPRECATED, shortcut : string].
672    *
673    * Style is a style constant like PR_PLAIN, or can be a string of the
674    * form 'lang-FOO', where FOO is a language extension describing the
675    * language of the portion of the token in $1 after pattern executes.
676    * E.g., if style is 'lang-lisp', and group 1 contains the text
677    * '(hello (world))', then that portion of the token will be passed to the
678    * registered lisp handler for formatting.
679    * The text before and after group 1 will be restyled using this decorator
680    * so decorators should take care that this doesn't result in infinite
681    * recursion.  For example, the HTML lexer rule for SCRIPT elements looks
682    * something like ['lang-js', /<[s]cript>(.+?)<\/script>/].  This may match
683    * '<script>foo()<\/script>', which would cause the current decorator to
684    * be called with '<script>' which would not match the same rule since
685    * group 1 must not be empty, so it would be instead styled as PR_TAG by
686    * the generic tag rule.  The handler registered for the 'js' extension would
687    * then be called with 'foo()', and finally, the current decorator would
688    * be called with '<\/script>' which would not match the original rule and
689    * so the generic tag rule would identify it as a tag.
690    *
691    * Pattern must only match prefixes, and if it matches a prefix, then that
692    * match is considered a token with the same style.
693    *
694    * Context is applied to the last non-whitespace, non-comment token
695    * recognized.
696    *
697    * Shortcut is an optional string of characters, any of which, if the first
698    * character, gurantee that this pattern and only this pattern matches.
699    *
700    * @param {Array} shortcutStylePatterns patterns that always start with
701    *   a known character.  Must have a shortcut string.
702    * @param {Array} fallthroughStylePatterns patterns that will be tried in
703    *   order if the shortcut ones fail.  May have shortcuts.
704    *
705    * @return {function (JobT)} a function that takes an undecorated job and
706    *   attaches a list of decorations.
707    */
708  function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) {
709    var shortcuts = {};
710    var tokenizer;
711    (function () {
712      var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns);
713      var allRegexs = [];
714      var regexKeys = {};
715      for (var i = 0, n = allPatterns.length; i < n; ++i) {
716        var patternParts = allPatterns[i];
717        var shortcutChars = patternParts[3];
718        if (shortcutChars) {
719          for (var c = shortcutChars.length; --c >= 0;) {
720            shortcuts[shortcutChars.charAt(c)] = patternParts;
721          }
722        }
723        var regex = patternParts[1];
724        var k = '' + regex;
725        if (!regexKeys.hasOwnProperty(k)) {
726          allRegexs.push(regex);
727          regexKeys[k] = null;
728        }
729      }
730      allRegexs.push(/[\0-\uffff]/);
731      tokenizer = combinePrefixPatterns(allRegexs);
732    })();
733
734    var nPatterns = fallthroughStylePatterns.length;
735
736    /**
737     * Lexes job.sourceCode and attaches an output array job.decorations of
738     * style classes preceded by the position at which they start in
739     * job.sourceCode in order.
740     *
741     * @type{function (JobT)}
742     */
743    var decorate = function (job) {
744      var sourceCode = job.sourceCode, basePos = job.basePos;
745      var sourceNode = job.sourceNode;
746      /** Even entries are positions in source in ascending order.  Odd enties
747        * are style markers (e.g., PR_COMMENT) that run from that position until
748        * the end.
749        * @type {DecorationsT}
750        */
751      var decorations = [basePos, PR_PLAIN];
752      var pos = 0;  // index into sourceCode
753      var tokens = sourceCode.match(tokenizer) || [];
754      var styleCache = {};
755
756      for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) {
757        var token = tokens[ti];
758        var style = styleCache[token];
759        var match = void 0;
760
761        var isEmbedded;
762        if (typeof style === 'string') {
763          isEmbedded = false;
764        } else {
765          var patternParts = shortcuts[token.charAt(0)];
766          if (patternParts) {
767            match = token.match(patternParts[1]);
768            style = patternParts[0];
769          } else {
770            for (var i = 0; i < nPatterns; ++i) {
771              patternParts = fallthroughStylePatterns[i];
772              match = token.match(patternParts[1]);
773              if (match) {
774                style = patternParts[0];
775                break;
776              }
777            }
778
779            if (!match) {  // make sure that we make progress
780              style = PR_PLAIN;
781            }
782          }
783
784          isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5);
785          if (isEmbedded && !(match && typeof match[1] === 'string')) {
786            isEmbedded = false;
787            style = PR_SOURCE;
788          }
789
790          if (!isEmbedded) { styleCache[token] = style; }
791        }
792
793        var tokenStart = pos;
794        pos += token.length;
795
796        if (!isEmbedded) {
797          decorations.push(basePos + tokenStart, style);
798        } else {  // Treat group 1 as an embedded block of source code.
799          var embeddedSource = match[1];
800          var embeddedSourceStart = token.indexOf(embeddedSource);
801          var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length;
802          if (match[2]) {
803            // If embeddedSource can be blank, then it would match at the
804            // beginning which would cause us to infinitely recurse on the
805            // entire token, so we catch the right context in match[2].
806            embeddedSourceEnd = token.length - match[2].length;
807            embeddedSourceStart = embeddedSourceEnd - embeddedSource.length;
808          }
809          var lang = style.substring(5);
810          // Decorate the left of the embedded source
811          appendDecorations(
812              sourceNode,
813              basePos + tokenStart,
814              token.substring(0, embeddedSourceStart),
815              decorate, decorations);
816          // Decorate the embedded source
817          appendDecorations(
818              sourceNode,
819              basePos + tokenStart + embeddedSourceStart,
820              embeddedSource,
821              langHandlerForExtension(lang, embeddedSource),
822              decorations);
823          // Decorate the right of the embedded section
824          appendDecorations(
825              sourceNode,
826              basePos + tokenStart + embeddedSourceEnd,
827              token.substring(embeddedSourceEnd),
828              decorate, decorations);
829        }
830      }
831      job.decorations = decorations;
832    };
833    return decorate;
834  }
835
836  /** returns a function that produces a list of decorations from source text.
837    *
838    * This code treats ", ', and ` as string delimiters, and \ as a string
839    * escape.  It does not recognize perl's qq() style strings.
840    * It has no special handling for double delimiter escapes as in basic, or
841    * the tripled delimiters used in python, but should work on those regardless
842    * although in those cases a single string literal may be broken up into
843    * multiple adjacent string literals.
844    *
845    * It recognizes C, C++, and shell style comments.
846    *
847    * @param {Object} options a set of optional parameters.
848    * @return {function (JobT)} a function that examines the source code
849    *     in the input job and builds a decoration list which it attaches to
850    *     the job.
851    */
852  function sourceDecorator(options) {
853    var shortcutStylePatterns = [], fallthroughStylePatterns = [];
854    if (options['tripleQuotedStrings']) {
855      // '''multi-line-string''', 'single-line-string', and double-quoted
856      shortcutStylePatterns.push(
857          [PR_STRING,  /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/,
858           null, '\'"']);
859    } else if (options['multiLineStrings']) {
860      // 'multi-line-string', "multi-line-string"
861      shortcutStylePatterns.push(
862          [PR_STRING,  /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/,
863           null, '\'"`']);
864    } else {
865      // 'single-line-string', "single-line-string"
866      shortcutStylePatterns.push(
867          [PR_STRING,
868           /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/,
869           null, '"\'']);
870    }
871    if (options['verbatimStrings']) {
872      // verbatim-string-literal production from the C# grammar.  See issue 93.
873      fallthroughStylePatterns.push(
874          [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]);
875    }
876    var hc = options['hashComments'];
877    if (hc) {
878      if (options['cStyleComments']) {
879        if (hc > 1) {  // multiline hash comments
880          shortcutStylePatterns.push(
881              [PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#']);
882        } else {
883          // Stop C preprocessor declarations at an unclosed open comment
884          shortcutStylePatterns.push(
885              [PR_COMMENT, /^#(?:(?:define|e(?:l|nd)if|else|error|ifn?def|include|line|pragma|undef|warning)\b|[^\r\n]*)/,
886               null, '#']);
887        }
888        // #include <stdio.h>
889        fallthroughStylePatterns.push(
890            [PR_STRING,
891             /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h(?:h|pp|\+\+)?|[a-z]\w*)>/,
892             null]);
893      } else {
894        shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']);
895      }
896    }
897    if (options['cStyleComments']) {
898      fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]);
899      fallthroughStylePatterns.push(
900          [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]);
901    }
902    var regexLiterals = options['regexLiterals'];
903    if (regexLiterals) {
904      /**
905       * @const
906       */
907      var regexExcls = regexLiterals > 1
908        ? ''  // Multiline regex literals
909        : '\n\r';
910      /**
911       * @const
912       */
913      var regexAny = regexExcls ? '.' : '[\\S\\s]';
914      /**
915       * @const
916       */
917      var REGEX_LITERAL = (
918          // A regular expression literal starts with a slash that is
919          // not followed by * or / so that it is not confused with
920          // comments.
921          '/(?=[^/*' + regexExcls + '])'
922          // and then contains any number of raw characters,
923          + '(?:[^/\\x5B\\x5C' + regexExcls + ']'
924          // escape sequences (\x5C),
925          +    '|\\x5C' + regexAny
926          // or non-nesting character sets (\x5B\x5D);
927          +    '|\\x5B(?:[^\\x5C\\x5D' + regexExcls + ']'
928          +             '|\\x5C' + regexAny + ')*(?:\\x5D|$))+'
929          // finally closed by a /.
930          + '/');
931      fallthroughStylePatterns.push(
932          ['lang-regex',
933           RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')')
934           ]);
935    }
936
937    var types = options['types'];
938    if (types) {
939      fallthroughStylePatterns.push([PR_TYPE, types]);
940    }
941
942    var keywords = ("" + options['keywords']).replace(/^ | $/g, '');
943    if (keywords.length) {
944      fallthroughStylePatterns.push(
945          [PR_KEYWORD,
946           new RegExp('^(?:' + keywords.replace(/[\s,]+/g, '|') + ')\\b'),
947           null]);
948    }
949
950    shortcutStylePatterns.push([PR_PLAIN,       /^\s+/, null, ' \r\n\t\xA0']);
951
952    var punctuation =
953      // The Bash man page says
954
955      // A word is a sequence of characters considered as a single
956      // unit by GRUB. Words are separated by metacharacters,
957      // which are the following plus space, tab, and newline: { }
958      // | & $ ; < >
959      // ...
960
961      // A word beginning with # causes that word and all remaining
962      // characters on that line to be ignored.
963
964      // which means that only a '#' after /(?:^|[{}|&$;<>\s])/ starts a
965      // comment but empirically
966      // $ echo {#}
967      // {#}
968      // $ echo \$#
969      // $#
970      // $ echo }#
971      // }#
972
973      // so /(?:^|[|&;<>\s])/ is more appropriate.
974
975      // http://gcc.gnu.org/onlinedocs/gcc-2.95.3/cpp_1.html#SEC3
976      // suggests that this definition is compatible with a
977      // default mode that tries to use a single token definition
978      // to recognize both bash/python style comments and C
979      // preprocessor directives.
980
981      // This definition of punctuation does not include # in the list of
982      // follow-on exclusions, so # will not be broken before if preceeded
983      // by a punctuation character.  We could try to exclude # after
984      // [|&;<>] but that doesn't seem to cause many major problems.
985      // If that does turn out to be a problem, we should change the below
986      // when hc is truthy to include # in the run of punctuation characters
987      // only when not followint [|&;<>].
988      '^.[^\\s\\w.$@\'"`/\\\\]*';
989    if (options['regexLiterals']) {
990      punctuation += '(?!\s*\/)';
991    }
992
993    fallthroughStylePatterns.push(
994        // TODO(mikesamuel): recognize non-latin letters and numerals in idents
995        [PR_LITERAL,     /^@[a-z_$][a-z_$@0-9]*/i, null],
996        [PR_TYPE,        /^(?:[@_]?[A-Z]+[a-z][A-Za-z_$@0-9]*|\w+_t\b)/, null],
997        [PR_PLAIN,       /^[a-z_$][a-z_$@0-9]*/i, null],
998        [PR_LITERAL,
999         new RegExp(
1000             '^(?:'
1001             // A hex number
1002             + '0x[a-f0-9]+'
1003             // or an octal or decimal number,
1004             + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)'
1005             // possibly in scientific notation
1006             + '(?:e[+\\-]?\\d+)?'
1007             + ')'
1008             // with an optional modifier like UL for unsigned long
1009             + '[a-z]*', 'i'),
1010         null, '0123456789'],
1011        // Don't treat escaped quotes in bash as starting strings.
1012        // See issue 144.
1013        [PR_PLAIN,       /^\\[\s\S]?/, null],
1014        [PR_PUNCTUATION, new RegExp(punctuation), null]);
1015
1016    return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns);
1017  }
1018
1019  var decorateSource = sourceDecorator({
1020        'keywords': ALL_KEYWORDS,
1021        'hashComments': true,
1022        'cStyleComments': true,
1023        'multiLineStrings': true,
1024        'regexLiterals': true
1025      });
1026
1027  /**
1028   * Given a DOM subtree, wraps it in a list, and puts each line into its own
1029   * list item.
1030   *
1031   * @param {Node} node modified in place.  Its content is pulled into an
1032   *     HTMLOListElement, and each line is moved into a separate list item.
1033   *     This requires cloning elements, so the input might not have unique
1034   *     IDs after numbering.
1035   * @param {number|null|boolean} startLineNum
1036   *     If truthy, coerced to an integer which is the 1-indexed line number
1037   *     of the first line of code.  The number of the first line will be
1038   *     attached to the list.
1039   * @param {boolean} isPreformatted true iff white-space in text nodes should
1040   *     be treated as significant.
1041   */
1042  function numberLines(node, startLineNum, isPreformatted) {
1043    var nocode = /(?:^|\s)nocode(?:\s|$)/;
1044    var lineBreak = /\r\n?|\n/;
1045
1046    var document = node.ownerDocument;
1047
1048    var li = document.createElement('li');
1049    while (node.firstChild) {
1050      li.appendChild(node.firstChild);
1051    }
1052    // An array of lines.  We split below, so this is initialized to one
1053    // un-split line.
1054    var listItems = [li];
1055
1056    function walk(node) {
1057      var type = node.nodeType;
1058      if (type == 1 && !nocode.test(node.className)) {  // Element
1059        if ('br' === node.nodeName.toLowerCase()) {
1060          breakAfter(node);
1061          // Discard the <BR> since it is now flush against a </LI>.
1062          if (node.parentNode) {
1063            node.parentNode.removeChild(node);
1064          }
1065        } else {
1066          for (var child = node.firstChild; child; child = child.nextSibling) {
1067            walk(child);
1068          }
1069        }
1070      } else if ((type == 3 || type == 4) && isPreformatted) {  // Text
1071        var text = node.nodeValue;
1072        var match = text.match(lineBreak);
1073        if (match) {
1074          var firstLine = text.substring(0, match.index);
1075          node.nodeValue = firstLine;
1076          var tail = text.substring(match.index + match[0].length);
1077          if (tail) {
1078            var parent = node.parentNode;
1079            parent.insertBefore(
1080              document.createTextNode(tail), node.nextSibling);
1081          }
1082          breakAfter(node);
1083          if (!firstLine) {
1084            // Don't leave blank text nodes in the DOM.
1085            node.parentNode.removeChild(node);
1086          }
1087        }
1088      }
1089    }
1090
1091    // Split a line after the given node.
1092    function breakAfter(lineEndNode) {
1093      // If there's nothing to the right, then we can skip ending the line
1094      // here, and move root-wards since splitting just before an end-tag
1095      // would require us to create a bunch of empty copies.
1096      while (!lineEndNode.nextSibling) {
1097        lineEndNode = lineEndNode.parentNode;
1098        if (!lineEndNode) { return; }
1099      }
1100
1101      function breakLeftOf(limit, copy) {
1102        // Clone shallowly if this node needs to be on both sides of the break.
1103        var rightSide = copy ? limit.cloneNode(false) : limit;
1104        var parent = limit.parentNode;
1105        if (parent) {
1106          // We clone the parent chain.
1107          // This helps us resurrect important styling elements that cross lines.
1108          // E.g. in <i>Foo<br>Bar</i>
1109          // should be rewritten to <li><i>Foo</i></li><li><i>Bar</i></li>.
1110          var parentClone = breakLeftOf(parent, 1);
1111          // Move the clone and everything to the right of the original
1112          // onto the cloned parent.
1113          var next = limit.nextSibling;
1114          parentClone.appendChild(rightSide);
1115          for (var sibling = next; sibling; sibling = next) {
1116            next = sibling.nextSibling;
1117            parentClone.appendChild(sibling);
1118          }
1119        }
1120        return rightSide;
1121      }
1122
1123      var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0);
1124
1125      // Walk the parent chain until we reach an unattached LI.
1126      for (var parent;
1127           // Check nodeType since IE invents document fragments.
1128           (parent = copiedListItem.parentNode) && parent.nodeType === 1;) {
1129        copiedListItem = parent;
1130      }
1131      // Put it on the list of lines for later processing.
1132      listItems.push(copiedListItem);
1133    }
1134
1135    // Split lines while there are lines left to split.
1136    for (var i = 0;  // Number of lines that have been split so far.
1137         i < listItems.length;  // length updated by breakAfter calls.
1138         ++i) {
1139      walk(listItems[i]);
1140    }
1141
1142    // Make sure numeric indices show correctly.
1143    if (startLineNum === (startLineNum|0)) {
1144      listItems[0].setAttribute('value', startLineNum);
1145    }
1146
1147    var ol = document.createElement('ol');
1148    ol.className = 'linenums';
1149    var offset = Math.max(0, ((startLineNum - 1 /* zero index */)) | 0) || 0;
1150    for (var i = 0, n = listItems.length; i < n; ++i) {
1151      li = listItems[i];
1152      // Stick a class on the LIs so that stylesheets can
1153      // color odd/even rows, or any other row pattern that
1154      // is co-prime with 10.
1155      li.className = 'L' + ((i + offset) % 10);
1156      if (!li.firstChild) {
1157        li.appendChild(document.createTextNode('\xA0'));
1158      }
1159      ol.appendChild(li);
1160    }
1161
1162    node.appendChild(ol);
1163  }
1164
1165
1166  /**
1167   * Breaks {@code job.sourceCode} around style boundaries in
1168   * {@code job.decorations} and modifies {@code job.sourceNode} in place.
1169   * @param {JobT} job
1170   * @private
1171   */
1172  function recombineTagsAndDecorations(job) {
1173    var isIE8OrEarlier = /\bMSIE\s(\d+)/.exec(navigator.userAgent);
1174    isIE8OrEarlier = isIE8OrEarlier && +isIE8OrEarlier[1] <= 8;
1175    var newlineRe = /\n/g;
1176
1177    var source = job.sourceCode;
1178    var sourceLength = source.length;
1179    // Index into source after the last code-unit recombined.
1180    var sourceIndex = 0;
1181
1182    var spans = job.spans;
1183    var nSpans = spans.length;
1184    // Index into spans after the last span which ends at or before sourceIndex.
1185    var spanIndex = 0;
1186
1187    var decorations = job.decorations;
1188    var nDecorations = decorations.length;
1189    // Index into decorations after the last decoration which ends at or before
1190    // sourceIndex.
1191    var decorationIndex = 0;
1192
1193    // Remove all zero-length decorations.
1194    decorations[nDecorations] = sourceLength;
1195    var decPos, i;
1196    for (i = decPos = 0; i < nDecorations;) {
1197      if (decorations[i] !== decorations[i + 2]) {
1198        decorations[decPos++] = decorations[i++];
1199        decorations[decPos++] = decorations[i++];
1200      } else {
1201        i += 2;
1202      }
1203    }
1204    nDecorations = decPos;
1205
1206    // Simplify decorations.
1207    for (i = decPos = 0; i < nDecorations;) {
1208      var startPos = decorations[i];
1209      // Conflate all adjacent decorations that use the same style.
1210      var startDec = decorations[i + 1];
1211      var end = i + 2;
1212      while (end + 2 <= nDecorations && decorations[end + 1] === startDec) {
1213        end += 2;
1214      }
1215      decorations[decPos++] = startPos;
1216      decorations[decPos++] = startDec;
1217      i = end;
1218    }
1219
1220    nDecorations = decorations.length = decPos;
1221
1222    var sourceNode = job.sourceNode;
1223    var oldDisplay = "";
1224    if (sourceNode) {
1225      oldDisplay = sourceNode.style.display;
1226      sourceNode.style.display = 'none';
1227    }
1228    try {
1229      var decoration = null;
1230      while (spanIndex < nSpans) {
1231        var spanStart = spans[spanIndex];
1232        var spanEnd = /** @type{number} */ (spans[spanIndex + 2])
1233            || sourceLength;
1234
1235        var decEnd = decorations[decorationIndex + 2] || sourceLength;
1236
1237        var end = Math.min(spanEnd, decEnd);
1238
1239        var textNode = /** @type{Node} */ (spans[spanIndex + 1]);
1240        var styledText;
1241        if (textNode.nodeType !== 1  // Don't muck with <BR>s or <LI>s
1242            // Don't introduce spans around empty text nodes.
1243            && (styledText = source.substring(sourceIndex, end))) {
1244          // This may seem bizarre, and it is.  Emitting LF on IE causes the
1245          // code to display with spaces instead of line breaks.
1246          // Emitting Windows standard issue linebreaks (CRLF) causes a blank
1247          // space to appear at the beginning of every line but the first.
1248          // Emitting an old Mac OS 9 line separator makes everything spiffy.
1249          if (isIE8OrEarlier) {
1250            styledText = styledText.replace(newlineRe, '\r');
1251          }
1252          textNode.nodeValue = styledText;
1253          var document = textNode.ownerDocument;
1254          var span = document.createElement('span');
1255          span.className = decorations[decorationIndex + 1];
1256          var parentNode = textNode.parentNode;
1257          parentNode.replaceChild(span, textNode);
1258          span.appendChild(textNode);
1259          if (sourceIndex < spanEnd) {  // Split off a text node.
1260            spans[spanIndex + 1] = textNode
1261                // TODO: Possibly optimize by using '' if there's no flicker.
1262                = document.createTextNode(source.substring(end, spanEnd));
1263            parentNode.insertBefore(textNode, span.nextSibling);
1264          }
1265        }
1266
1267        sourceIndex = end;
1268
1269        if (sourceIndex >= spanEnd) {
1270          spanIndex += 2;
1271        }
1272        if (sourceIndex >= decEnd) {
1273          decorationIndex += 2;
1274        }
1275      }
1276    } finally {
1277      if (sourceNode) {
1278        sourceNode.style.display = oldDisplay;
1279      }
1280    }
1281  }
1282
1283
1284  /** Maps language-specific file extensions to handlers. */
1285  var langHandlerRegistry = {};
1286  /** Register a language handler for the given file extensions.
1287    * @param {function (JobT)} handler a function from source code to a list
1288    *      of decorations.  Takes a single argument job which describes the
1289    *      state of the computation and attaches the decorations to it.
1290    * @param {Array.<string>} fileExtensions
1291    */
1292  function registerLangHandler(handler, fileExtensions) {
1293    for (var i = fileExtensions.length; --i >= 0;) {
1294      var ext = fileExtensions[i];
1295      if (!langHandlerRegistry.hasOwnProperty(ext)) {
1296        langHandlerRegistry[ext] = handler;
1297      } else if (win['console']) {
1298        console['warn']('cannot override language handler %s', ext);
1299      }
1300    }
1301  }
1302  function langHandlerForExtension(extension, source) {
1303    if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) {
1304      // Treat it as markup if the first non whitespace character is a < and
1305      // the last non-whitespace character is a >.
1306      extension = /^\s*</.test(source)
1307          ? 'default-markup'
1308          : 'default-code';
1309    }
1310    return langHandlerRegistry[extension];
1311  }
1312  registerLangHandler(decorateSource, ['default-code']);
1313  registerLangHandler(
1314      createSimpleLexer(
1315          [],
1316          [
1317           [PR_PLAIN,       /^[^<?]+/],
1318           [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/],
1319           [PR_COMMENT,     /^<\!--[\s\S]*?(?:-\->|$)/],
1320           // Unescaped content in an unknown language
1321           ['lang-',        /^<\?([\s\S]+?)(?:\?>|$)/],
1322           ['lang-',        /^<%([\s\S]+?)(?:%>|$)/],
1323           [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/],
1324           ['lang-',        /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i],
1325           // Unescaped content in javascript.  (Or possibly vbscript).
1326           ['lang-js',      /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i],
1327           // Contains unescaped stylesheet content
1328           ['lang-css',     /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i],
1329           ['lang-in.tag',  /^(<\/?[a-z][^<>]*>)/i]
1330          ]),
1331      ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']);
1332  registerLangHandler(
1333      createSimpleLexer(
1334          [
1335           [PR_PLAIN,        /^[\s]+/, null, ' \t\r\n'],
1336           [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\'']
1337           ],
1338          [
1339           [PR_TAG,          /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i],
1340           [PR_ATTRIB_NAME,  /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i],
1341           ['lang-uq.val',   /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/],
1342           [PR_PUNCTUATION,  /^[=<>\/]+/],
1343           ['lang-js',       /^on\w+\s*=\s*\"([^\"]+)\"/i],
1344           ['lang-js',       /^on\w+\s*=\s*\'([^\']+)\'/i],
1345           ['lang-js',       /^on\w+\s*=\s*([^\"\'>\s]+)/i],
1346           ['lang-css',      /^style\s*=\s*\"([^\"]+)\"/i],
1347           ['lang-css',      /^style\s*=\s*\'([^\']+)\'/i],
1348           ['lang-css',      /^style\s*=\s*([^\"\'>\s]+)/i]
1349           ]),
1350      ['in.tag']);
1351  registerLangHandler(
1352      createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']);
1353  registerLangHandler(sourceDecorator({
1354          'keywords': CPP_KEYWORDS,
1355          'hashComments': true,
1356          'cStyleComments': true,
1357          'types': C_TYPES
1358        }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']);
1359  registerLangHandler(sourceDecorator({
1360          'keywords': 'null,true,false'
1361        }), ['json']);
1362  registerLangHandler(sourceDecorator({
1363          'keywords': CSHARP_KEYWORDS,
1364          'hashComments': true,
1365          'cStyleComments': true,
1366          'verbatimStrings': true,
1367          'types': C_TYPES
1368        }), ['cs']);
1369  registerLangHandler(sourceDecorator({
1370          'keywords': JAVA_KEYWORDS,
1371          'cStyleComments': true
1372        }), ['java']);
1373  registerLangHandler(sourceDecorator({
1374          'keywords': SH_KEYWORDS,
1375          'hashComments': true,
1376          'multiLineStrings': true
1377        }), ['bash', 'bsh', 'csh', 'sh']);
1378  registerLangHandler(sourceDecorator({
1379          'keywords': PYTHON_KEYWORDS,
1380          'hashComments': true,
1381          'multiLineStrings': true,
1382          'tripleQuotedStrings': true
1383        }), ['cv', 'py', 'python']);
1384  registerLangHandler(sourceDecorator({
1385          'keywords': PERL_KEYWORDS,
1386          'hashComments': true,
1387          'multiLineStrings': true,
1388          'regexLiterals': 2  // multiline regex literals
1389        }), ['perl', 'pl', 'pm']);
1390  registerLangHandler(sourceDecorator({
1391          'keywords': RUBY_KEYWORDS,
1392          'hashComments': true,
1393          'multiLineStrings': true,
1394          'regexLiterals': true
1395        }), ['rb', 'ruby']);
1396  registerLangHandler(sourceDecorator({
1397          'keywords': JSCRIPT_KEYWORDS,
1398          'cStyleComments': true,
1399          'regexLiterals': true
1400        }), ['javascript', 'js', 'ts', 'typescript']);
1401  registerLangHandler(sourceDecorator({
1402          'keywords': COFFEE_KEYWORDS,
1403          'hashComments': 3,  // ### style block comments
1404          'cStyleComments': true,
1405          'multilineStrings': true,
1406          'tripleQuotedStrings': true,
1407          'regexLiterals': true
1408        }), ['coffee']);
1409  registerLangHandler(
1410      createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']);
1411
1412  /** @param {JobT} job */
1413  function applyDecorator(job) {
1414    var opt_langExtension = job.langExtension;
1415
1416    try {
1417      // Extract tags, and convert the source code to plain text.
1418      var sourceAndSpans = extractSourceSpans(job.sourceNode, job.pre);
1419      /** Plain text. @type {string} */
1420      var source = sourceAndSpans.sourceCode;
1421      job.sourceCode = source;
1422      job.spans = sourceAndSpans.spans;
1423      job.basePos = 0;
1424
1425      // Apply the appropriate language handler
1426      langHandlerForExtension(opt_langExtension, source)(job);
1427
1428      // Integrate the decorations and tags back into the source code,
1429      // modifying the sourceNode in place.
1430      recombineTagsAndDecorations(job);
1431    } catch (e) {
1432      if (win['console']) {
1433        console['log'](e && e['stack'] || e);
1434      }
1435    }
1436  }
1437
1438  /**
1439   * Pretty print a chunk of code.
1440   * @param sourceCodeHtml {string} The HTML to pretty print.
1441   * @param opt_langExtension {string} The language name to use.
1442   *     Typically, a filename extension like 'cpp' or 'java'.
1443   * @param opt_numberLines {number|boolean} True to number lines,
1444   *     or the 1-indexed number of the first line in sourceCodeHtml.
1445   */
1446  function $prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) {
1447    /** @type{number|boolean} */
1448    var nl = opt_numberLines || false;
1449    /** @type{string|null} */
1450    var langExtension = opt_langExtension || null;
1451    /** @type{!Element} */
1452    var container = document.createElement('div');
1453    // This could cause images to load and onload listeners to fire.
1454    // E.g. <img onerror="alert(1337)" src="nosuchimage.png">.
1455    // We assume that the inner HTML is from a trusted source.
1456    // The pre-tag is required for IE8 which strips newlines from innerHTML
1457    // when it is injected into a <pre> tag.
1458    // http://stackoverflow.com/questions/451486/pre-tag-loses-line-breaks-when-setting-innerhtml-in-ie
1459    // http://stackoverflow.com/questions/195363/inserting-a-newline-into-a-pre-tag-ie-javascript
1460    container.innerHTML = '<pre>' + sourceCodeHtml + '</pre>';
1461    container = /** @type{!Element} */(container.firstChild);
1462    if (nl) {
1463      numberLines(container, nl, true);
1464    }
1465
1466    /** @type{JobT} */
1467    var job = {
1468      langExtension: langExtension,
1469      numberLines: nl,
1470      sourceNode: container,
1471      pre: 1,
1472      sourceCode: null,
1473      basePos: null,
1474      spans: null,
1475      decorations: null
1476    };
1477    applyDecorator(job);
1478    return container.innerHTML;
1479  }
1480
1481   /**
1482    * Find all the {@code <pre>} and {@code <code>} tags in the DOM with
1483    * {@code class=prettyprint} and prettify them.
1484    *
1485    * @param {Function} opt_whenDone called when prettifying is done.
1486    * @param {HTMLElement|HTMLDocument} opt_root an element or document
1487    *   containing all the elements to pretty print.
1488    *   Defaults to {@code document.body}.
1489    */
1490  function $prettyPrint(opt_whenDone, opt_root) {
1491    var root = opt_root || document.body;
1492    var doc = root.ownerDocument || document;
1493    function byTagName(tn) { return root.getElementsByTagName(tn); }
1494    // fetch a list of nodes to rewrite
1495    var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')];
1496    var elements = [];
1497    for (var i = 0; i < codeSegments.length; ++i) {
1498      for (var j = 0, n = codeSegments[i].length; j < n; ++j) {
1499        elements.push(codeSegments[i][j]);
1500      }
1501    }
1502    codeSegments = null;
1503
1504    var clock = Date;
1505    if (!clock['now']) {
1506      clock = { 'now': function () { return +(new Date); } };
1507    }
1508
1509    // The loop is broken into a series of continuations to make sure that we
1510    // don't make the browser unresponsive when rewriting a large page.
1511    var k = 0;
1512
1513    var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/;
1514    var prettyPrintRe = /\bprettyprint\b/;
1515    var prettyPrintedRe = /\bprettyprinted\b/;
1516    var preformattedTagNameRe = /pre|xmp/i;
1517    var codeRe = /^code$/i;
1518    var preCodeXmpRe = /^(?:pre|code|xmp)$/i;
1519    var EMPTY = {};
1520
1521    function doWork() {
1522      var endTime = (win['PR_SHOULD_USE_CONTINUATION'] ?
1523                     clock['now']() + 250 /* ms */ :
1524                     Infinity);
1525      for (; k < elements.length && clock['now']() < endTime; k++) {
1526        var cs = elements[k];
1527
1528        // Look for a preceding comment like
1529        // <?prettify lang="..." linenums="..."?>
1530        var attrs = EMPTY;
1531        {
1532          for (var preceder = cs; (preceder = preceder.previousSibling);) {
1533            var nt = preceder.nodeType;
1534            // <?foo?> is parsed by HTML 5 to a comment node (8)
1535            // like <!--?foo?-->, but in XML is a processing instruction
1536            var value = (nt === 7 || nt === 8) && preceder.nodeValue;
1537            if (value
1538                ? !/^\??prettify\b/.test(value)
1539                : (nt !== 3 || /\S/.test(preceder.nodeValue))) {
1540              // Skip over white-space text nodes but not others.
1541              break;
1542            }
1543            if (value) {
1544              attrs = {};
1545              value.replace(
1546                  /\b(\w+)=([\w:.%+-]+)/g,
1547                function (_, name, value) { attrs[name] = value; });
1548              break;
1549            }
1550          }
1551        }
1552
1553        var className = cs.className;
1554        if ((attrs !== EMPTY || prettyPrintRe.test(className))
1555            // Don't redo this if we've already done it.
1556            // This allows recalling pretty print to just prettyprint elements
1557            // that have been added to the page since last call.
1558            && !prettyPrintedRe.test(className)) {
1559
1560          // make sure this is not nested in an already prettified element
1561          var nested = false;
1562          for (var p = cs.parentNode; p; p = p.parentNode) {
1563            var tn = p.tagName;
1564            if (preCodeXmpRe.test(tn)
1565                && p.className && prettyPrintRe.test(p.className)) {
1566              nested = true;
1567              break;
1568            }
1569          }
1570          if (!nested) {
1571            // Mark done.  If we fail to prettyprint for whatever reason,
1572            // we shouldn't try again.
1573            cs.className += ' prettyprinted';
1574
1575            // If the classes includes a language extensions, use it.
1576            // Language extensions can be specified like
1577            //     <pre class="prettyprint lang-cpp">
1578            // the language extension "cpp" is used to find a language handler
1579            // as passed to PR.registerLangHandler.
1580            // HTML5 recommends that a language be specified using "language-"
1581            // as the prefix instead.  Google Code Prettify supports both.
1582            // http://dev.w3.org/html5/spec-author-view/the-code-element.html
1583            var langExtension = attrs['lang'];
1584            if (!langExtension) {
1585              langExtension = className.match(langExtensionRe);
1586              // Support <pre class="prettyprint"><code class="language-c">
1587              var wrapper;
1588              if (!langExtension && (wrapper = childContentWrapper(cs))
1589                  && codeRe.test(wrapper.tagName)) {
1590                langExtension = wrapper.className.match(langExtensionRe);
1591              }
1592
1593              if (langExtension) { langExtension = langExtension[1]; }
1594            }
1595
1596            var preformatted;
1597            if (preformattedTagNameRe.test(cs.tagName)) {
1598              preformatted = 1;
1599            } else {
1600              var currentStyle = cs['currentStyle'];
1601              var defaultView = doc.defaultView;
1602              var whitespace = (
1603                  currentStyle
1604                  ? currentStyle['whiteSpace']
1605                  : (defaultView
1606                     && defaultView.getComputedStyle)
1607                  ? defaultView.getComputedStyle(cs, null)
1608                  .getPropertyValue('white-space')
1609                  : 0);
1610              preformatted = whitespace
1611                  && 'pre' === whitespace.substring(0, 3);
1612            }
1613
1614            // Look for a class like linenums or linenums:<n> where <n> is the
1615            // 1-indexed number of the first line.
1616            var lineNums = attrs['linenums'];
1617            if (!(lineNums = lineNums === 'true' || +lineNums)) {
1618              lineNums = className.match(/\blinenums\b(?::(\d+))?/);
1619              lineNums =
1620                lineNums
1621                ? lineNums[1] && lineNums[1].length
1622                  ? +lineNums[1] : true
1623                : false;
1624            }
1625            if (lineNums) { numberLines(cs, lineNums, preformatted); }
1626
1627            // do the pretty printing
1628            var prettyPrintingJob = {
1629              langExtension: langExtension,
1630              sourceNode: cs,
1631              numberLines: lineNums,
1632              pre: preformatted,
1633              sourceCode: null,
1634              basePos: null,
1635              spans: null,
1636              decorations: null
1637            };
1638            applyDecorator(prettyPrintingJob);
1639          }
1640        }
1641      }
1642      if (k < elements.length) {
1643        // finish up in a continuation
1644        win.setTimeout(doWork, 250);
1645      } else if ('function' === typeof opt_whenDone) {
1646        opt_whenDone();
1647      }
1648    }
1649
1650    doWork();
1651  }
1652
1653  /**
1654   * Contains functions for creating and registering new language handlers.
1655   * @type {Object}
1656   */
1657  var PR = win['PR'] = {
1658        'createSimpleLexer': createSimpleLexer,
1659        'registerLangHandler': registerLangHandler,
1660        'sourceDecorator': sourceDecorator,
1661        'PR_ATTRIB_NAME': PR_ATTRIB_NAME,
1662        'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE,
1663        'PR_COMMENT': PR_COMMENT,
1664        'PR_DECLARATION': PR_DECLARATION,
1665        'PR_KEYWORD': PR_KEYWORD,
1666        'PR_LITERAL': PR_LITERAL,
1667        'PR_NOCODE': PR_NOCODE,
1668        'PR_PLAIN': PR_PLAIN,
1669        'PR_PUNCTUATION': PR_PUNCTUATION,
1670        'PR_SOURCE': PR_SOURCE,
1671        'PR_STRING': PR_STRING,
1672        'PR_TAG': PR_TAG,
1673        'PR_TYPE': PR_TYPE,
1674        'prettyPrintOne':
1675           IN_GLOBAL_SCOPE
1676             ? (win['prettyPrintOne'] = $prettyPrintOne)
1677             : (prettyPrintOne = $prettyPrintOne),
1678        'prettyPrint':
1679           IN_GLOBAL_SCOPE
1680             ? (win['prettyPrint'] = $prettyPrint)
1681             : (prettyPrint = $prettyPrint)
1682      };
1683
1684  // Make PR available via the Asynchronous Module Definition (AMD) API.
1685  // Per https://github.com/amdjs/amdjs-api/wiki/AMD:
1686  // The Asynchronous Module Definition (AMD) API specifies a
1687  // mechanism for defining modules such that the module and its
1688  // dependencies can be asynchronously loaded.
1689  // ...
1690  // To allow a clear indicator that a global define function (as
1691  // needed for script src browser loading) conforms to the AMD API,
1692  // any global define function SHOULD have a property called "amd"
1693  // whose value is an object. This helps avoid conflict with any
1694  // other existing JavaScript code that could have defined a define()
1695  // function that does not conform to the AMD API.
1696  var define = win['define'];
1697  if (typeof define === "function" && define['amd']) {
1698    define("google-code-prettify", [], function () {
1699      return PR;
1700    });
1701  }
1702})();
1703
1704
1705var path = require('path');
1706
1707module.exports = {
1708  prettyPrint: prettyPrint,
1709  prettyPrintOne: prettyPrintOne,
1710  // include paths for css preprocessor support
1711  includePaths: [
1712    __dirname,
1713    path.resolve(__dirname, '../styles')
1714  ]
1715};
1716