1/**
2 * @license
3 * Copyright (C) 2009 Google Inc.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 *    http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/**
19 * @fileoverview
20 * Registers a language handler for CSS.
21 *
22 *
23 * To use, include prettify.js and this file in your HTML page.
24 * Then put your code in an HTML tag like
25 *      <pre class="prettyprint lang-css"></pre>
26 *
27 *
28 * http://www.w3.org/TR/CSS21/grammar.html Section G2 defines the lexical
29 * grammar.  This scheme does not recognize keywords containing escapes.
30 *
31 * @author mikesamuel@gmail.com
32 */
33
34// This file is a call to a function defined in prettify.js which defines a
35// lexical scanner for CSS and maps tokens to styles.
36
37// The call to PR['registerLangHandler'] is quoted so that Closure Compiler
38// will not rename the call so that this language extensions can be
39// compiled/minified separately from one another.  Other symbols defined in
40// prettify.js are similarly quoted.
41
42// The call is structured thus:
43// PR['registerLangHandler'](
44//    PR['createSimpleLexer'](
45//        shortcutPatterns,
46//        fallThroughPatterns),
47//    [languageId0, ..., languageIdN])
48
49// Langugage IDs
50// =============
51// The language IDs are typically the file extensions of source files for
52// that language so that users can syntax highlight arbitrary files based
53// on just the extension.  This is heuristic, but works pretty well in
54// practice.
55
56// Patterns
57// ========
58// Lexers are typically implemented as a set of regular expressions.
59// The SimpleLexer function takes regular expressions, styles, and some
60// pragma-info and produces a lexer.  A token description looks like
61//   [STYLE_NAME, /regular-expression/, pragmas]
62
63// Initially, simple lexer's inner loop looked like:
64
65//    while sourceCode is not empty:
66//      try each regular expression in order until one matches
67//      remove the matched portion from sourceCode
68
69// This was really slow for large files because some JS interpreters
70// do a buffer copy on the matched portion which is O(n*n)
71
72// The current loop now looks like
73
74//    1. use js-modules/combinePrefixPatterns.js to
75//       combine all regular expressions into one
76//    2. use a single global regular expresion match to extract all tokens
77//    3. for each token try regular expressions in order until one matches it
78//       and classify it using the associated style
79
80// This is a lot more efficient but it does mean that lookahead and lookbehind
81// can't be used across boundaries to classify tokens.
82
83// Sometimes we need lookahead and lookbehind and sometimes we want to handle
84// embedded language -- JavaScript or CSS embedded in HTML, or inline assembly
85// in C.
86
87// If a particular pattern has a numbered group, and its style pattern starts
88// with "lang-" as in
89//    ['lang-js', /<script>(.*?)<\/script>/]
90// then the token classification step breaks the token into pieces.
91// Group 1 is re-parsed using the language handler for "lang-js", and the
92// surrounding portions are reclassified using the current language handler.
93// This mechanism gives us both lookahead, lookbehind, and language embedding.
94
95// Shortcut Patterns
96// =================
97// A shortcut pattern is one that is tried before other patterns if the first
98// character in the token is in the string of characters.
99// This very effectively lets us make quick correct decisions for common token
100// types.
101
102// All other patterns are fall-through patterns.
103
104
105
106// The comments inline below refer to productions in the CSS specification's
107// lexical grammar.  See link above.
108PR['registerLangHandler'](
109    PR['createSimpleLexer'](
110        // Shortcut patterns.
111        [
112         // The space production <s>
113         [PR['PR_PLAIN'],       /^[ \t\r\n\f]+/, null, ' \t\r\n\f']
114        ],
115        // Fall-through patterns.
116        [
117         // Quoted strings.  <string1> and <string2>
118         [PR['PR_STRING'],
119          /^\"(?:[^\n\r\f\\\"]|\\(?:\r\n?|\n|\f)|\\[\s\S])*\"/, null],
120         [PR['PR_STRING'],
121          /^\'(?:[^\n\r\f\\\']|\\(?:\r\n?|\n|\f)|\\[\s\S])*\'/, null],
122         ['lang-css-str', /^url\(([^\)\"\']+)\)/i],
123         [PR['PR_KEYWORD'],
124          /^(?:url|rgb|\!important|@import|@page|@media|@charset|inherit)(?=[^\-\w]|$)/i,
125          null],
126         // A property name -- an identifier followed by a colon.
127         ['lang-css-kw', /^(-?(?:[_a-z]|(?:\\[0-9a-f]+ ?))(?:[_a-z0-9\-]|\\(?:\\[0-9a-f]+ ?))*)\s*:/i],
128         // A C style block comment.  The <comment> production.
129         [PR['PR_COMMENT'], /^\/\*[^*]*\*+(?:[^\/*][^*]*\*+)*\//],
130         // Escaping text spans
131         [PR['PR_COMMENT'], /^(?:<!--|-->)/],
132         // A number possibly containing a suffix.
133         [PR['PR_LITERAL'], /^(?:\d+|\d*\.\d+)(?:%|[a-z]+)?/i],
134         // A hex color
135         [PR['PR_LITERAL'], /^#(?:[0-9a-f]{3}){1,2}\b/i],
136         // An identifier
137         [PR['PR_PLAIN'],
138          /^-?(?:[_a-z]|(?:\\[\da-f]+ ?))(?:[_a-z\d\-]|\\(?:\\[\da-f]+ ?))*/i],
139         // A run of punctuation
140         [PR['PR_PUNCTUATION'], /^[^\s\w\'\"]+/]
141        ]),
142    ['css']);
143// Above we use embedded languages to highlight property names (identifiers
144// followed by a colon) differently from identifiers in values.
145PR['registerLangHandler'](
146    PR['createSimpleLexer']([],
147        [
148         [PR['PR_KEYWORD'],
149          /^-?(?:[_a-z]|(?:\\[\da-f]+ ?))(?:[_a-z\d\-]|\\(?:\\[\da-f]+ ?))*/i]
150        ]),
151    ['css-kw']);
152// The content of an unquoted URL literal like url(http://foo/img.png) should
153// be colored as string content.  This language handler is used above in the
154// URL production to do so.
155PR['registerLangHandler'](
156    PR['createSimpleLexer']([],
157        [
158         [PR['PR_STRING'], /^[^\)\"\']+/]
159        ]),
160    ['css-str']);
161