1/*!
2 * Copyright (c) 2006 js-markdown-extra developers
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote products
14 *    derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28var MARKDOWN_VERSION = "1.0.1o";
29var MARKDOWNEXTRA_VERSION = "1.2.5";
30
31// Global default settings:
32
33/** Change to ">" for HTML output */
34var MARKDOWN_EMPTY_ELEMENT_SUFFIX = " />";
35
36/** Define the width of a tab for code blocks. */
37var MARKDOWN_TAB_WIDTH = 4;
38
39/** Optional title attribute for footnote links and backlinks. */
40var MARKDOWN_FN_LINK_TITLE     = "";
41var MARKDOWN_FN_BACKLINK_TITLE = "";
42
43/** Optional class attribute for footnote links and backlinks. */
44var MARKDOWN_FN_LINK_CLASS     = "";
45var MARKDOWN_FN_BACKLINK_CLASS = "";
46
47/** Change to false to remove Markdown from posts and/or comments. */
48var MARKDOWN_WP_POSTS    = true;
49var MARKDOWN_WP_COMMENTS = true;
50
51/** Standard Function Interface */
52MARKDOWN_PARSER_CLASS = 'MarkdownExtra_Parser';
53
54/**
55 * Converts Markdown formatted text to HTML.
56 * @param text Markdown text
57 * @return HTML
58 */
59function Markdown(text) {
60    //Initialize the parser and return the result of its transform method.
61    var parser;
62    if('undefined' == typeof arguments.callee.parser) {
63        parser = eval("new " + MARKDOWN_PARSER_CLASS + "()");
64        parser.init();
65        arguments.callee.parser = parser;
66    }
67    else {
68        parser = arguments.callee.parser;
69    }
70    // Transform text using parser.
71    return parser.transform(text);
72}
73
74/**
75 * Constructor function. Initialize appropriate member variables.
76 */
77function Markdown_Parser() {
78
79    this.nested_brackets_depth = 6;
80    this.nested_url_parenthesis_depth = 4;
81    this.escape_chars = "\\\\`*_{}[]()>#+-.!";
82
83    // Document transformations
84    this.document_gamut = [
85        // Strip link definitions, store in hashes.
86        ['stripLinkDefinitions', 20],
87        ['runBasicBlockGamut',   30]
88    ];
89
90    // These are all the transformations that form block-level
91    /// tags like paragraphs, headers, and list items.
92    this.block_gamut = [
93        ['doHeaders',         10],
94        ['doHorizontalRules', 20],
95        ['doLists',           40],
96        ['doCodeBlocks',      50],
97        ['doBlockQuotes',     60]
98    ];
99
100    // These are all the transformations that occur *within* block-level
101    // tags like paragraphs, headers, and list items.
102    this.span_gamut = [
103        // Process character escapes, code spans, and inline HTML
104        // in one shot.
105        ['parseSpan',          -30],
106        // Process anchor and image tags. Images must come first,
107        // because ![foo][f] looks like an anchor.
108        ['doImages',            10],
109        ['doAnchors',           20],
110        // Make links out of things like `<http://example.com/>`
111        // Must come after doAnchors, because you can use < and >
112        // delimiters in inline links like [this](<url>).
113        ['doAutoLinks',         30],
114        ['encodeAmpsAndAngles', 40],
115        ['doItalicsAndBold',    50],
116        ['doHardBreaks',        60]
117    ];
118
119    this.em_relist = [
120        ['' , '(?:(^|[^\\*])(\\*)(?=[^\\*])|(^|[^_])(_)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'],
121        ['*', '((?:\\S|^)[^\\*])(\\*)(?!\\*)'],
122        ['_', '((?:\\S|^)[^_])(_)(?!_)']
123    ];
124    this.strong_relist = [
125        ['' , '(?:(^|[^\\*])(\\*\\*)(?=[^\\*])|(^|[^_])(__)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'],
126        ['**', '((?:\\S|^)[^\\*])(\\*\\*)(?!\\*)'],
127        ['__', '((?:\\S|^)[^_])(__)(?!_)']
128    ];
129    this.em_strong_relist = [
130        ['' , '(?:(^|[^\\*])(\\*\\*\\*)(?=[^\\*])|(^|[^_])(___)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'],
131        ['***', '((?:\\S|^)[^\\*])(\\*\\*\\*)(?!\\*)'],
132        ['___', '((?:\\S|^)[^_])(___)(?!_)']
133    ];
134}
135
136Markdown_Parser.prototype.init = function() {
137    // this._initDetab(); // NOTE: JavaScript string length is already based on Unicode
138    this.prepareItalicsAndBold();
139
140    // Regex to match balanced [brackets].
141    // Needed to insert a maximum bracked depth while converting to PHP.
142    // NOTE: JavaScript doesn't have so faster option for RegExp
143    //this.nested_brackets_re = new RegExp(
144    //    str_repeat('(?>[^\\[\\]]+|\\[', this.nested_brackets_depth) +
145    //    str_repeat('\\])*', this.nested_brackets_depth)
146    //);
147    // NOTE: JavaScript doesn't have so faster option for RegExp
148    //this.nested_url_parenthesis_re = new RegExp(
149    //    str_repeat('(?>[^()\\s]+|\\(', this.nested_url_parenthesis_depth) +
150    //    str_repeat('(?>\\)))*', this.nested_url_parenthesis_depth)
151    //);
152
153    this.nested_brackets_re = '(?:\\[[^\\]]*\]|[^\\[\\]]*)';
154    this.nested_url_parenthesis_re = '(?:\\([^\\)\\s]*\\)|[^\\(\\)]*)';
155
156    // Table of hash values for escaped characters:
157    var tmp = [];
158    for(var i = 0; i < this.escape_chars.length; i++) {
159        tmp.push(this._php_preg_quote(this.escape_chars.charAt(i)));
160    }
161    this.escape_chars_re = '[' + tmp.join('') + ']';
162
163    // Change to ">" for HTML output.
164    this.empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
165    this.tab_width = MARKDOWN_TAB_WIDTH;
166
167    // Change to `true` to disallow markup or entities.
168    this.no_markup = false;
169    this.no_entities = false;
170
171    // Predefined urls and titles for reference links and images.
172    this.predef_urls = {};
173    this.predef_titles = {};
174
175    // Sort document, block, and span gamut in ascendent priority order.
176    function cmp_gamut(a, b) {
177        a = a[1]; b = b[1];
178        return a > b ? 1 : a < b ? -1 : 0;
179    }
180    this.document_gamut.sort(cmp_gamut);
181    this.block_gamut.sort(cmp_gamut);
182    this.span_gamut.sort(cmp_gamut);
183
184    // Internal hashes used during transformation.
185    this.urls = {};
186    this.titles = {};
187    this.html_hashes = {};
188
189    // Status flag to avoid invalid nesting.
190    this.in_anchor = false;
191};
192
193/**
194 * [porting note]
195 * JavaScript's RegExp doesn't have escape code \A and \Z.
196 * So multiline pattern can't match start/end of text. Instead
197 * wrap whole of text with STX(02) and ETX(03).
198 */
199Markdown_Parser.prototype.__wrapSTXETX__ = function(text) {
200    if(text.charAt(0) != '\x02') { text = '\x02' + text; }
201    if(text.charAt(text.length - 1) != '\x03') { text = text + '\x03'; }
202    return text;
203};
204
205/**
206 * [porting note]
207 * Strip STX(02) and ETX(03).
208 */
209Markdown_Parser.prototype.__unwrapSTXETX__ = function(text) {
210    if(text.charAt(0) == '\x02') { text = text.substr(1); }
211    if(text.charAt(text.length - 1) == '\x03') { text = text.substr(0, text.length - 1); }
212    return text;
213};
214
215/**
216 *
217 */
218Markdown_Parser.prototype._php_preg_quote = function(text) {
219  if(!arguments.callee.sRE) {
220    arguments.callee.sRE = /(\/|\.|\*|\+|\?|\||\(|\)|\[|\]|\{|\}\\)/g;
221  }
222  return text.replace(arguments.callee.sRE, '\\$1');
223};
224
225Markdown_Parser.prototype._php_str_repeat = function(str, n) {
226    var tmp = str;
227    for(var i = 1; i < n; i++) {
228        tmp += str;
229    }
230    return tmp;
231};
232
233Markdown_Parser.prototype._php_trim = function(target, charlist) {
234    var chars = charlist || " \t\n\r";
235    return target.replace(
236        new RegExp("^[" + chars + "]*|[" + chars + "]*$", "g"), ""
237    );
238};
239
240Markdown_Parser.prototype._php_rtrim = function(target, charlist) {
241    var chars = charlist || " \t\n\r";
242    return target.replace(
243        new RegExp( "[" + chars + "]*$", "g" ), ""
244    );
245};
246
247Markdown_Parser.prototype._php_htmlspecialchars_ENT_NOQUOTES = function(str) {
248    return str.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
249};
250
251
252/**
253 * Called before the transformation process starts to setup parser
254 * states.
255 */
256Markdown_Parser.prototype.setup = function() {
257    // Clear global hashes.
258    this.urls = this.predef_urls;
259    this.titles = this.predef_titles;
260    this.html_hashes = {};
261
262    this.in_anchor = false;
263};
264
265/**
266 * Called after the transformation process to clear any variable
267 * which may be taking up memory unnecessarly.
268 */
269Markdown_Parser.prototype.teardown = function() {
270    this.urls = {};
271    this.titles = {};
272    this.html_hashes = {};
273};
274
275/**
276 * Main function. Performs some preprocessing on the input text
277 * and pass it through the document gamut.
278 */
279Markdown_Parser.prototype.transform = function(text) {
280    this.setup();
281
282    // Remove UTF-8 BOM and marker character in input, if present.
283    text = text.replace(/^\xEF\xBB\xBF|\x1A/, "");
284
285    // Standardize line endings:
286    //   DOS to Unix and Mac to Unix
287    text = text.replace(/\r\n?/g, "\n", text);
288
289    // Make sure $text ends with a couple of newlines:
290    text += "\n\n";
291
292    // Convert all tabs to spaces.
293    text = this.detab(text);
294
295    // Turn block-level HTML blocks into hash entries
296    text = this.hashHTMLBlocks(text);
297
298    // Strip any lines consisting only of spaces and tabs.
299    // This makes subsequent regexen easier to write, because we can
300    // match consecutive blank lines with /\n+/ instead of something
301    // contorted like /[ ]*\n+/ .
302    text = text.replace(/^[ ]+$/m, "");
303
304    // Run document gamut methods.
305    for(var i = 0; i < this.document_gamut.length; i++) {
306        var method = this[this.document_gamut[i][0]];
307        if(method) {
308            text = method.call(this, text);
309        }
310        else {
311            console.log(this.document_gamut[i][0] + ' not implemented');
312        }
313    }
314
315    this.teardown();
316
317    return text + "\n";
318};
319
320Markdown_Parser.prototype.hashHTMLBlocks = function(text) {
321    if(this.no_markup) { return text; }
322
323    var less_than_tab = this.tab_width - 1;
324
325    // Hashify HTML blocks:
326    // We only want to do this for block-level HTML tags, such as headers,
327    // lists, and tables. That's because we still want to wrap <p>s around
328    // "paragraphs" that are wrapped in non-block-level tags, such as anchors,
329    // phrase emphasis, and spans. The list of tags we're looking for is
330    // hard-coded:
331    //
332    // *  List "a" is made of tags which can be both inline or block-level.
333    //    These will be treated block-level when the start tag is alone on
334    //    its line, otherwise they're not matched here and will be taken as
335    //    inline later.
336    // *  List "b" is made of tags which are always block-level;
337
338    var block_tags_a_re = 'ins|del';
339    var block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|' +
340                          'script|noscript|form|fieldset|iframe|math';
341
342    // Regular expression for the content of a block tag.
343    var nested_tags_level = 4;
344    var attr =
345        '(?:'                + // optional tag attributes
346            '\\s'            + // starts with whitespace
347            '(?:'            +
348                '[^>"/]+'    + // text outside quotes
349            '|'              +
350                '/+(?!>)'    + // slash not followed by ">"
351            '|'              +
352                '"[^"]*"'    + // text inside double quotes (tolerate ">")
353            '|'              +
354                '\'[^\']*\'' + // text inside single quotes (tolerate ">")
355            ')*'             +
356        ')?';
357    var content =
358        this._php_str_repeat(
359            '(?:'                  +
360                '[^<]+'            + // content without tag
361            '|'                    +
362                '<\\2'             + // nested opening tag
363                attr               + // attributes
364                '(?:'              +
365                    '/>'           +
366                '|'                +
367                    '>',
368            nested_tags_level
369        )                          + // end of opening tag
370        '.*?'                      + // last level nested tag content
371        this._php_str_repeat(
372                   '</\\2\\s*>'    + // closing nested tag
373                ')'                +
374                '|'                +
375                    '<(?!/\\2\\s*>)' + // other tags with a different name
376            ')*',
377            nested_tags_level
378        );
379
380    var content2 = content.replace('\\2', '\\3');
381
382    // First, look for nested blocks, e.g.:
383    //   <div>
384    //     <div>
385    //       tags for inner block must be indented.
386    //     </div>
387    //   </div>
388    //
389    // The outermost tags must start at the left margin for this to match, and
390    // the inner nested divs must be indented.
391    // We need to do this before the next, more liberal match, because the next
392    // match will start at the first `<div>` and stop at the first `</div>`.
393    var all = new RegExp('(?:' +
394        '(?:'                  +
395            '(?:\\n\\n)'       + // Starting after a blank line
396            '|'                + // or
397            '(?:\\x02)\\n?'    + // the beginning of the doc
398        ')'                    +
399        '('                    + // save in $1
400
401        // Match from `\n<tag>` to `</tag>\n`, handling nested tags
402        // in between.
403            '[ ]{0,' + less_than_tab + '}' +
404            '<(' + block_tags_b_re + ')'   + // start tag = $2
405            attr + '>'                     + // attributes followed by > and \n
406            content                        + // content, support nesting
407            '</\\2>'                       + // the matching end tag
408            '[ ]*'                         + // trailing spaces/tabs
409            '(?=\\n+|\\n*\\x03)'           + // followed by a newline or end of document
410
411        '|' + // Special version for tags of group a.
412
413            '[ ]{0,' + less_than_tab + '}' +
414            '<(' + block_tags_a_re + ')'   + // start tag = $3
415            attr + '>[ ]*\\n'              + // attributes followed by >
416            content2                       + // content, support nesting
417            '</\\3>'                       + // the matching end tag
418            '[ ]*'                         + // trailing spaces/tabs
419            '(?=\\n+|\\n*\\x03)'           + // followed by a newline or end of document
420
421        '|' + // Special case just for <hr />. It was easier to make a special
422              // case than to make the other regex more complicated.
423
424            '[ ]{0,' + less_than_tab + '}' +
425            '<(hr)'                        +  // start tag = $2
426            attr                           + // attributes
427            '/?>'                          + // the matching end tag
428            '[ ]*'                         +
429            '(?=\\n{2,}|\\n*\\x03)'        + // followed by a blank line or end of document
430
431        '|' + // Special case for standalone HTML comments:
432
433            '[ ]{0,' + less_than_tab + '}' +
434            '(?:'                          + //'(?s:' +
435                '<!--.*?-->'               +
436            ')'                            +
437            '[ ]*'                         +
438            '(?=\\n{2,}|\\n*\\x03)'        + // followed by a blank line or end of document
439
440        '|' + // PHP and ASP-style processor instructions (<? and <%)
441
442            '[ ]{0,' + less_than_tab + '}' +
443            '(?:'                          + //'(?s:' +
444                '<([?%])'                  + // $2
445                '.*?'                      +
446                '\\2>'                     +
447            ')'                            +
448            '[ ]*'                         +
449            '(?=\\n{2,}|\\n*\\x03)'        + // followed by a blank line or end of document
450
451        ')' +
452    ')', 'mig');
453    // FIXME: JS doesnt have enough escape sequence \A nor \Z.
454
455    var self = this;
456    text = this.__wrapSTXETX__(text);
457    text = text.replace(all, function(match, text) {
458        //console.log(match);
459        var key  = self.hashBlock(text);
460        return "\n\n" + key + "\n\n";
461    });
462    text = this.__unwrapSTXETX__(text);
463    return text;
464};
465
466/**
467 * Called whenever a tag must be hashed when a function insert an atomic
468 * element in the text stream. Passing $text to through this function gives
469 * a unique text-token which will be reverted back when calling unhash.
470 *
471 * The boundary argument specify what character should be used to surround
472 * the token. By convension, "B" is used for block elements that needs not
473 * to be wrapped into paragraph tags at the end, ":" is used for elements
474 * that are word separators and "X" is used in the general case.
475 */
476Markdown_Parser.prototype.hashPart = function(text, boundary) {
477    if('undefined' === typeof boundary) {
478        boundary = 'X';
479    }
480    // Swap back any tag hash found in text so we do not have to `unhash`
481    // multiple times at the end.
482    text = this.unhash(text);
483
484    // Then hash the block.
485    if('undefined' === typeof arguments.callee.i) {
486        arguments.callee.i = 0;
487    }
488    var key = boundary + "\x1A" + (++arguments.callee.i) + boundary;
489    this.html_hashes[key] = text;
490    return key; // String that will replace the tag.
491};
492
493/**
494 * Shortcut function for hashPart with block-level boundaries.
495 */
496Markdown_Parser.prototype.hashBlock = function(text) {
497    return this.hashPart(text, 'B');
498};
499
500/**
501 * Strips link definitions from text, stores the URLs and titles in
502 * hash references.
503 */
504Markdown_Parser.prototype.stripLinkDefinitions = function(text) {
505    var less_than_tab = this.tab_width - 1;
506    var self = this;
507    // Link defs are in the form: ^[id]: url "optional title"
508    text = this.__wrapSTXETX__(text);
509    text = text.replace(new RegExp(
510        '^[ ]{0,' + less_than_tab + '}\\[(.+)\\][ ]?:' + // id = $1
511            '[ ]*'        +
512                '\\n?'    + // maybe *one* newline
513                '[ ]*'    +
514            '(?:'         +
515                '<(.+?)>' + // url = $2
516            '|'           +
517                '(\\S+?)' + // url = $3
518            ')'           +
519            '[ ]*'        +
520            '\\n?'        + // maybe one newline
521            '[ ]*'        +
522            '(?:'         +
523                //'(?=\\s)' + // lookbehind for whitespace
524                '["\\(]'  +
525                '(.*?)'   + // title = $4
526                '["\\)]'  +
527                '[ ]*'    +
528            ')?'          + // title is optional
529            '(?:\\n+|\\n*(?=\\x03))',
530        'mg'), function(match, id, url2, url3, title) {
531            //console.log(match);
532            var link_id = id.toLowerCase();
533            var url = url2 ? url2 : url3;
534            self.urls[link_id] = url;
535            self.titles[link_id] = title;
536            return ''; // String that will replace the block
537        }
538    );
539    text = this.__unwrapSTXETX__(text);
540    return text;
541};
542
543/**
544 * Run block gamut tranformations.
545 */
546Markdown_Parser.prototype.runBlockGamut = function(text) {
547    // We need to escape raw HTML in Markdown source before doing anything
548    // else. This need to be done for each block, and not only at the
549    // begining in the Markdown function since hashed blocks can be part of
550    // list items and could have been indented. Indented blocks would have
551    // been seen as a code block in a previous pass of hashHTMLBlocks.
552    text = this.hashHTMLBlocks(text);
553    return this.runBasicBlockGamut(text);
554};
555
556/**
557 * Run block gamut tranformations, without hashing HTML blocks. This is
558 * useful when HTML blocks are known to be already hashed, like in the first
559 * whole-document pass.
560 */
561Markdown_Parser.prototype.runBasicBlockGamut = function(text) {
562    for(var i = 0; i < this.block_gamut.length; i++) {
563        var method = this[this.block_gamut[i][0]];
564        if(method) {
565            text = method.call(this, text);
566        }
567        else {
568            console.log(this.block_gamut[i][0] + ' not implemented');
569        }
570    }
571    // Finally form paragraph and restore hashed blocks.
572    text = this.formParagraphs(text);
573    return text;
574};
575
576/**
577 * Do Horizontal Rules:
578 */
579Markdown_Parser.prototype.doHorizontalRules = function(text) {
580    var self = this;
581    return text.replace(new RegExp(
582        '^[ ]{0,3}'    + // Leading space
583        '([-\\*_])'    + // $1: First marker
584        '(?:'          + // Repeated marker group
585            '[ ]{0,2}' + // Zero, one, or two spaces.
586            '\\1'      + // Marker character
587        '){2,}'        + // Group repeated at least twice
588        '[ ]*'         + //Tailing spaces
589        '$'            , // End of line.
590    'mg'), function(match) {
591        //console.log(match);
592        return "\n" + self.hashBlock("<hr" + self.empty_element_suffix) + "\n";
593    });
594};
595
596/**
597 * Run span gamut tranformations.
598 */
599Markdown_Parser.prototype.runSpanGamut = function(text) {
600    for(var i = 0; i < this.span_gamut.length; i++) {
601        var method = this[this.span_gamut[i][0]];
602        if(method) {
603            text = method.call(this, text);
604        }
605        else {
606            console.log(this.span_gamut[i][0] + ' not implemented');
607        }
608    }
609    return text;
610};
611
612/**
613 * Do hard breaks:
614 */
615Markdown_Parser.prototype.doHardBreaks = function(text) {
616    var self = this;
617    return text.replace(/ {2,}\n/mg, function(match) {
618        //console.log(match);
619        return self.hashPart("<br" + self.empty_element_suffix + "\n");
620    });
621};
622
623
624/**
625 * Turn Markdown link shortcuts into XHTML <a> tags.
626 */
627Markdown_Parser.prototype.doAnchors = function(text) {
628    if (this.in_anchor) return text;
629    this.in_anchor = true;
630
631    var self = this;
632
633    var _doAnchors_reference_callback = function(match, whole_match, link_text, link_id) {
634        //console.log(match);
635        if(typeof(link_id) !== 'string' || link_id === '') {
636            // for shortcut links like [this][] or [this].
637            link_id = link_text;
638        }
639
640        // lower-case and turn embedded newlines into spaces
641        link_id = link_id.toLowerCase();
642        link_id = link_id.replace(/[ ]?\n/, ' ');
643
644        var result;
645        if ('undefined' !== typeof self.urls[link_id]) {
646            var url = self.urls[link_id];
647            url = self.encodeAttribute(url);
648
649            result = "<a href=\"" + url + "\"";
650            if ('undefined' !== typeof self.titles[link_id]) {
651                var title = self.titles[link_id];
652                title = self.encodeAttribute(title);
653                result +=  " title=\"" + title + "\"";
654            }
655
656            link_text = self.runSpanGamut(link_text);
657            result += ">" + link_text + "</a>";
658            result = self.hashPart(result);
659        }
660        else {
661            result = whole_match;
662        }
663        return result;
664    };
665
666    //
667    // First, handle reference-style links: [link text] [id]
668    //
669	// [porting note] the cheatText and conditional
670	// are simply checks that look and see whether the regex will
671	// be able to find a match. If we don't do this here we can get caught in
672	// a situation where backtracking grows exponentially.
673	// This helps us keep the same regex as the upstream PHP impl, but still be safe/fast
674    var cheatText = text.replace(/[^\[^\]^\n^\s]/gm, '');
675    if ((cheatText.indexOf("[][]") !== -1) || (cheatText.indexOf("[] []") !== -1) || (cheatText.indexOf("[]\n[]") !== -1)) {
676		text = text.replace(new RegExp(
677		    '('               + // wrap whole match in $1
678		      '\\['           +
679		        '(' + this.nested_brackets_re + ')' +  // link text = $2
680		      '\\]'           +
681
682		      '[ ]?'          + // one optional space
683		      '(?:\\n[ ]*)?'  + // one optional newline followed by spaces
684
685		      '\\['           +
686		        '(.*?)'       + // id = $3
687		      '\\]'           +
688		    ')',
689		    'mg'
690		), _doAnchors_reference_callback);
691	}
692
693    //
694    // Next, inline-style links: [link text](url "optional title")
695    //
696	// [porting note] the cheatText and conditional
697	// are simply checks that look and see whether the regex will
698	// be able to find a match. If we don't do this here we can get caught in
699	// a situation where backtracking grows exponentially.
700	// This helps us keep the same regex as the upstream PHP impl, but still be safe/fast
701    cheatText = text.replace(/[^\(^\)^\[^\]^\s]/gm, '').replace(/\(.*?\)/,'()');
702	if ((cheatText.indexOf("]()") !== -1) || (cheatText.indexOf("](\"\")") !== -1)) {
703		text = text.replace(new RegExp(
704		    '('               + // wrap whole match in $1
705		      '\\['           +
706		        '(' + this.nested_brackets_re + ')' + // link text = $2
707		      '\\]'           +
708		      '\\('           + // literal paren
709		        '[ \\n]*'     +
710		        '(?:'         +
711		            '<(.+?)>' + // href = $3
712		        '|'           +
713		            '(' + this.nested_url_parenthesis_re + ')' + // href = $4
714		        ')'           +
715		        '[ \\n]*'     +
716		        '('           + // $5
717		          '([\'"])'   + // quote char = $6
718		          '(.*?)'     + // Title = $7
719		          '\\6'       + // matching quote
720		          '[ \\n]*'   + // ignore any spaces/tabs between closing quote and )
721		        ')?'          + // title is optional
722		      '\\)'           +
723		    ')',
724		    'mg'
725		), function(match, whole_match, link_text, url3, url4, x0, x1, title) {
726		    //console.log(match);
727		    link_text = self.runSpanGamut(link_text);
728		    var url = url3 ? url3 : url4;
729
730		    url = self.encodeAttribute(url);
731
732		    var result = "<a href=\"" + url + "\"";
733		    if ('undefined' !== typeof title && title !== '') {
734		        title = self.encodeAttribute(title);
735		        result +=  " title=\"" + title + "\"";
736		    }
737
738		    link_text = self.runSpanGamut(link_text);
739		    result += ">" + link_text + "</a>";
740
741		    return self.hashPart(result);
742		});
743	}
744
745
746    //
747    // Last, handle reference-style shortcuts: [link text]
748    // These must come last in case you've also got [link text][1]
749    // or [link text](/foo)
750    //
751    text = text.replace(new RegExp(
752        '('                  + // wrap whole match in $1
753          '\\['              +
754              '([^\\[\\]]+)' + // link text = $2; can\'t contain [ or ]
755          '\\]'              +
756        ')',
757        'mg'
758    ), _doAnchors_reference_callback);
759
760    this.in_anchor = false;
761    return text;
762};
763
764/**
765 * Turn Markdown image shortcuts into <img> tags.
766 */
767Markdown_Parser.prototype.doImages = function(text) {
768    var self = this;
769
770    //
771    // First, handle reference-style labeled images: ![alt text][id]
772    //
773	cheatText = text.replace(/[^!^\[^\]^\n^\s]/gm, '').replace(/\[\s*\]/g, '[]');
774	if ((cheatText.indexOf('![][]') !== -1) || (cheatText.indexOf('![] []') !== -1) || (cheatText.indexOf('![]\n[]') !== -1)) {
775		text = text.replace(new RegExp(
776		    '('              + // wrap whole match in $1
777		      '!\\['         +
778		        '(' + this.nested_brackets_re + ')' + // alt text = $2
779		      '\\]'          +
780
781		      '[ ]?'         + // one optional space
782		      '(?:\\n[ ]*)?' + // one optional newline followed by spaces
783
784		      '\\['          +
785		        '(.*?)'      + // id = $3
786		      '\\]'          +
787
788		    ')',
789		    'mg'
790		), function(match, whole_match, alt_text, link_id) {
791		    //console.log(match);
792		    link_id = link_id.toLowerCase();
793
794		    if (typeof(link_id) !== 'string' || link_id === '') {
795		        link_id = alt_text.toLowerCase(); // for shortcut links like ![this][].
796		    }
797
798		    alt_text = self.encodeAttribute(alt_text);
799		    var result;
800		    if ('undefined' !== typeof self.urls[link_id]) {
801		        var url = self.encodeAttribute(self.urls[link_id]);
802		        result = "<img src=\"" + url + "\" alt=\"" + alt_text + "\"";
803		        if ('undefined' !== typeof self.titles[link_id]) {
804		            var title = self.titles[link_id];
805		            title = self.encodeAttribute(title);
806		            result +=  " title=\"" + title + "\"";
807		        }
808		        result += self.empty_element_suffix;
809		        result = self.hashPart(result);
810		    }
811		    else {
812		        // If there's no such link ID, leave intact:
813		        result = whole_match;
814		    }
815
816		    return result;
817		});
818	}
819
820    //
821    // Next, handle inline images:  ![alt text](url "optional title")
822    // Don't forget: encode * and _
823    //
824	cheatText = text.replace(/[^!^\(^\)^\[^\]^\n^\s]/gm, '').replace(/\[\s*\]/g, '[]');
825	if ((cheatText.indexOf(']()') !== -1) || (cheatText.indexOf('] ()') !== -1) || (cheatText.indexOf(']\n()') !== -1)) {
826		text = text.replace(new RegExp(
827		    '('                + // wrap whole match in $1
828		      '!\\['           +
829		        '(' + this.nested_brackets_re + ')' +		// alt text = $2
830		      '\\]'            +
831		      '\\s?'           + // One optional whitespace character
832		      '\\('            + // literal paren
833		        '[ \\n]*'      +
834		        '(?:'          +
835		            '<(\\S*)>' + // src url = $3
836		        '|'            +
837		            '(' + this.nested_url_parenthesis_re + ')' +	// src url = $4
838		        ')'            +
839		        '[ \\n]*'      +
840		        '('            + // $5
841		          '([\'"])'    + // quote char = $6
842		          '(.*?)'      + // title = $7
843		          '\\6'        + // matching quote
844		          '[ \\n]*'    +
845		        ')?'           + // title is optional
846		      '\\)'            +
847		    ')',
848		    'mg'
849		), function(match, whole_match, alt_text, url3, url4, x5, x6, title) {
850		    //console.log(match);
851		    var url = url3 ? url3 : url4;
852
853		    alt_text = self.encodeAttribute(alt_text);
854		    url = self.encodeAttribute(url);
855		    var result = "<img src=\"" + url + "\" alt=\"" + alt_text + "\"";
856		    if ('undefined' !== typeof title && title !== '') {
857		        title = self.encodeAttribute(title);
858		        result +=  " title=\"" + title + "\""; // $title already quoted
859		    }
860		    result += self.empty_element_suffix;
861
862		    return self.hashPart(result);
863		});
864	}
865
866    return text;
867};
868
869Markdown_Parser.prototype.doHeaders = function(text) {
870    var self = this;
871    // Setext-style headers:
872    //    Header 1
873    //    ========
874    //
875    //    Header 2
876    //    --------
877    //
878    text = text.replace(/^(.+?)[ ]*\n(=+|-+)[ ]*\n+/mg, function(match, span, line) {
879       //console.log(match);
880       // Terrible hack to check we haven't found an empty list item.
881        if(line == '-' && span.match(/^-(?: |$)/)) {
882            return match;
883        }
884        var level = line.charAt(0) == '=' ? 1 : 2;
885        var block = "<h" + level + ">" + self.runSpanGamut(span) + "</h" + level + ">";
886        return "\n" + self.hashBlock(block)  + "\n\n";
887    });
888
889    // atx-style headers:
890    //  # Header 1
891    //  ## Header 2
892    //  ## Header 2 with closing hashes ##
893    //  ...
894    //  ###### Header 6
895    //
896    text = text.replace(new RegExp(
897        '^(\\#{1,6})' + // $1 = string of #\'s
898        '[ ]*'        +
899        '(.+?)'       + // $2 = Header text
900        '[ ]*'        +
901        '\\#*'        + // optional closing #\'s (not counted)
902        '\\n+',
903        'mg'
904    ), function(match, hashes, span) {
905        //console.log(match);
906        var level = hashes.length;
907        var block = "<h" + level + ">" + self.runSpanGamut(span) + "</h" + level + ">";
908        return "\n" + self.hashBlock(block) + "\n\n";
909    });
910
911    return text;
912};
913
914/**
915 * Form HTML ordered (numbered) and unordered (bulleted) lists.
916 */
917Markdown_Parser.prototype.doLists = function(text) {
918    var less_than_tab = this.tab_width - 1;
919
920    // Re-usable patterns to match list item bullets and number markers:
921    var marker_ul_re  = '[\\*\\+-]';
922    var marker_ol_re  = '\\d+[\\.]';
923    var marker_any_re = "(?:" + marker_ul_re + "|" + marker_ol_re + ")";
924
925    var self = this;
926    var _doLists_callback = function(match, list, x2, x3, type) {
927        //console.log(match);
928        // Re-usable patterns to match list item bullets and number markers:
929        var list_type = type.match(marker_ul_re) ? "ul" : "ol";
930
931        var marker_any_re = list_type == "ul" ? marker_ul_re : marker_ol_re;
932
933        list += "\n";
934        var result = self.processListItems(list, marker_any_re);
935
936        result = self.hashBlock("<" + list_type + ">\n" + result + "</" + list_type + ">");
937        return "\n" + result + "\n\n";
938    };
939
940    var markers_relist = [
941        [marker_ul_re, marker_ol_re],
942        [marker_ol_re, marker_ul_re]
943    ];
944
945    for (var i = 0; i < markers_relist.length; i++) {
946        var marker_re = markers_relist[i][0];
947        var other_marker_re = markers_relist[i][1];
948        // Re-usable pattern to match any entirel ul or ol list:
949        var whole_list_re =
950            '('               + // $1 = whole list
951              '('             + // $2
952                '([ ]{0,' + less_than_tab + '})' + // $3 = number of spaces
953                '(' + marker_re + ')'            + // $4 = first list item marker
954                '[ ]+'        +
955              ')'             +
956              '[\\s\\S]+?'    +
957              '('             + // $5
958                  '(?=\\x03)' +  // \z
959                '|'           +
960                  '\\n{2,}'   +
961                  '(?=\\S)'   +
962                  '(?!'       + // Negative lookahead for another list item marker
963                    '[ ]*'    +
964                    marker_re + '[ ]+' +
965                  ')'         +
966                '|'           +
967                  '(?='       + // Lookahead for another kind of list
968                    '\\n'     +
969                    '\\3'     + // Must have the same indentation
970                    other_marker_re + '[ ]+' +
971                  ')'         +
972              ')'             +
973            ')'; // mx
974
975        // We use a different prefix before nested lists than top-level lists.
976        // See extended comment in _ProcessListItems().
977
978        text = this.__wrapSTXETX__(text);
979        if (this.list_level) {
980            text = text.replace(new RegExp('^' + whole_list_re, "mg"), _doLists_callback);
981        }
982        else {
983            text = text.replace(new RegExp(
984                '(?:(?=\\n)\\n|\\x02\\n?)' + // Must eat the newline
985                whole_list_re, "mg"
986            ), _doLists_callback);
987        }
988        text = this.__unwrapSTXETX__(text);
989    }
990
991    return text;
992};
993
994// var $list_level = 0;
995
996/**
997 * Process the contents of a single ordered or unordered list, splitting it
998 * into individual list items.
999 */
1000Markdown_Parser.prototype.processListItems = function(list_str, marker_any_re) {
1001    // The $this->list_level global keeps track of when we're inside a list.
1002    // Each time we enter a list, we increment it; when we leave a list,
1003    // we decrement. If it's zero, we're not in a list anymore.
1004    //
1005    // We do this because when we're not inside a list, we want to treat
1006    // something like this:
1007    //
1008    //    I recommend upgrading to version
1009    //    8. Oops, now this line is treated
1010    //    as a sub-list.
1011    //
1012    // As a single paragraph, despite the fact that the second line starts
1013    // with a digit-period-space sequence.
1014    //
1015    // Whereas when we're inside a list (or sub-list), that line will be
1016    // treated as the start of a sub-list. What a kludge, huh? This is
1017    // an aspect of Markdown's syntax that's hard to parse perfectly
1018    // without resorting to mind-reading. Perhaps the solution is to
1019    // change the syntax rules such that sub-lists must start with a
1020    // starting cardinal number; e.g. "1." or "a.".
1021
1022    if('undefined' === typeof this.list_level) {
1023        this.list_level = 0;
1024    }
1025    this.list_level++;
1026
1027    // trim trailing blank lines:
1028    list_str = this.__wrapSTXETX__(list_str);
1029    list_str = list_str.replace(/\n{2,}(?=\x03)/m, "\n");
1030    list_str = this.__unwrapSTXETX__(list_str);
1031
1032    var self = this;
1033    list_str = this.__wrapSTXETX__(list_str);
1034    list_str = list_str.replace(new RegExp(
1035        '(\\n)?'                + // leading line = $1
1036        '([ ]*)'                + // leading whitespace = $2
1037        '(' + marker_any_re     + // list marker and space = $3
1038            '(?:[ ]+|(?=\\n))'  + // space only required if item is not empty
1039        ')'                     +
1040        '([\\s\\S]*?)'          + // list item text   = $4
1041        '(?:(\\n+(?=\\n))|\\n)' + // tailing blank line = $5
1042        '(?=\\n*(\\x03|\\2(' + marker_any_re + ')(?:[ ]+|(?=\\n))))',
1043        "gm"
1044    ), function(match, leading_line, leading_space, marker_space, item, tailing_blank_line) {
1045        //console.log(match);
1046        //console.log(item, [leading_line ? leading_line.length : 0, tailing_blank_line ? tailing_blank_line.length : 0]);
1047        if (leading_line || tailing_blank_line || item.match(/\n{2,}/)) {
1048            // Replace marker with the appropriate whitespace indentation
1049            item = leading_space + self._php_str_repeat(' ', marker_space.length) + item;
1050            item = self.runBlockGamut(self.outdent(item) + "\n");
1051        }
1052        else {
1053            // Recursion for sub-lists:
1054            item = self.doLists(self.outdent(item));
1055            item = item.replace(/\n+$/m, '');
1056            item = self.runSpanGamut(item);
1057        }
1058
1059        return "<li>" + item + "</li>\n";
1060    });
1061    list_str = this.__unwrapSTXETX__(list_str);
1062
1063    this.list_level--;
1064    return list_str;
1065};
1066
1067/**
1068 *   Process Markdown `<pre><code>` blocks.
1069 */
1070Markdown_Parser.prototype.doCodeBlocks = function(text) {
1071    var self = this;
1072    text = this.__wrapSTXETX__(text);
1073    text = text.replace(new RegExp(
1074        '(?:^|\\n\\n|(?=\\x02)\\n)?' +
1075        '('                          + // $1 = the code block -- one or more lines, starting with a space/tab
1076		  '(?:'                      +
1077          '(?=('                     +
1078            '[ ]{' + this.tab_width + ',}' +  // Lines must start with a tab or a tab-width of spaces
1079            '.*\\n+'                 +
1080          '))\\2'                    +
1081		  ')+'                       +
1082        ')'                          +
1083        '((?=^[ ]{0,' + this.tab_width + '}\\S)|(?:\\n*(?=\\x03)))',  // Lookahead for non-space at line-start, or end of doc
1084        'mg'
1085    ), function(match, codeblock) {
1086        //console.log(match);
1087        codeblock = self.outdent(codeblock);
1088        codeblock = self._php_htmlspecialchars_ENT_NOQUOTES(codeblock);
1089
1090        // trim leading newlines and trailing newlines
1091        codeblock = self.__wrapSTXETX__(codeblock);
1092        codeblock = codeblock.replace(/(?=\x02)\n+|\n+(?=\x03)/g, '');
1093        codeblock = self.__unwrapSTXETX__(codeblock);
1094
1095        codeblock = "<pre><code>" + codeblock + "\n</code></pre>";
1096        return "\n\n" + self.hashBlock(codeblock) + "\n\n";
1097    });
1098    text = this.__unwrapSTXETX__(text);
1099    return text;
1100};
1101
1102/**
1103 * Create a code span markup for $code. Called from handleSpanToken.
1104 */
1105Markdown_Parser.prototype.makeCodeSpan = function(code) {
1106    code = this._php_htmlspecialchars_ENT_NOQUOTES(this._php_trim(code));
1107    return this.hashPart("<code>" + code + "</code>");
1108};
1109
1110/**
1111 * Prepare regular expressions for searching emphasis tokens in any
1112 * context.
1113 */
1114Markdown_Parser.prototype.prepareItalicsAndBold = function() {
1115    this.em_strong_prepared_relist = {};
1116    for(var i = 0; i < this.em_relist.length; i++) {
1117        var em = this.em_relist[i][0];
1118        var em_re = this.em_relist[i][1];
1119        for(var j = 0; j < this.strong_relist.length; j++) {
1120            var strong = this.strong_relist[j][0];
1121            var strong_re = this.strong_relist[j][1];
1122            // Construct list of allowed token expressions.
1123            var token_relist = [];
1124            for(var k = 0; k < this.em_strong_relist.length; k++) {
1125                var em_strong = this.em_strong_relist[k][0];
1126                var em_strong_re = this.em_strong_relist[k][1];
1127                if(em + strong == em_strong) {
1128                    token_relist.push(em_strong_re);
1129                }
1130            }
1131            token_relist.push(em_re);
1132            token_relist.push(strong_re);
1133
1134            // Construct master expression from list.
1135            var token_re = new RegExp('(' + token_relist.join('|')  + ')');
1136            this.em_strong_prepared_relist['rx_' + em + strong] = token_re;
1137        }
1138    }
1139};
1140
1141Markdown_Parser.prototype.doItalicsAndBold = function(text) {
1142    var em = '';
1143    var strong = '';
1144    var tree_char_em = false;
1145    var text_stack = [''];
1146    var token_stack = [];
1147    var token = '';
1148
1149    while (1) {
1150        //
1151        // Get prepared regular expression for seraching emphasis tokens
1152        // in current context.
1153        //
1154        var token_re = this.em_strong_prepared_relist['rx_' + em + strong];
1155
1156        //
1157        // Each loop iteration search for the next emphasis token.
1158        // Each token is then passed to handleSpanToken.
1159        //
1160        var parts = text.match(token_re); //PREG_SPLIT_DELIM_CAPTURE
1161        if(parts) {
1162            var left = RegExp.leftContext;
1163            var right = RegExp.rightContext;
1164            var pre = "";
1165            var marker = parts[1];
1166            for(var mg = 2; mg < parts.length; mg += 2) {
1167                if('undefined' !== typeof parts[mg] && parts[mg] != '') {
1168                    pre = parts[mg];
1169                    marker = parts[mg + 1];
1170                    break;
1171                }
1172            }
1173            //console.log([left + pre, marker]);
1174            text_stack[0] += (left + pre);
1175            token = marker;
1176            text = right;
1177        }
1178        else {
1179            text_stack[0] += text;
1180            token = '';
1181            text = '';
1182        }
1183        if(token == '') {
1184            // Reached end of text span: empty stack without emitting.
1185            // any more emphasis.
1186            while (token_stack.length > 0 && token_stack[0].length > 0) {
1187                text_stack[1] += token_stack.shift();
1188                var text_stack_prev0 = text_stack.shift(); // $text_stack[0] .= array_shift($text_stack);
1189                text_stack[0] += text_stack_prev0;
1190            }
1191            break;
1192        }
1193
1194        var tag, span;
1195
1196        var token_len = token.length;
1197        if (tree_char_em) {
1198            // Reached closing marker while inside a three-char emphasis.
1199            if (token_len == 3) {
1200                // Three-char closing marker, close em and strong.
1201                token_stack.shift();
1202                span = text_stack.shift();
1203                span = this.runSpanGamut(span);
1204                span = "<strong><em>" + span + "</em></strong>";
1205                text_stack[0] += this.hashPart(span);
1206                em = '';
1207                strong = '';
1208            } else {
1209                // Other closing marker: close one em or strong and
1210                // change current token state to match the other
1211                token_stack[0] = this._php_str_repeat(token.charAt(0), 3 - token_len);
1212                tag = token_len == 2 ? "strong" : "em";
1213                span = text_stack[0];
1214                span = this.runSpanGamut(span);
1215                span = "<" + tag + ">" + span + "</" + tag + ">";
1216                text_stack[0] = this.hashPart(span);
1217                if(tag == 'strong') { strong = ''; } else { em = ''; }
1218            }
1219            tree_char_em = false;
1220        } else if (token_len == 3) {
1221            if (em != '') {
1222                // Reached closing marker for both em and strong.
1223                // Closing strong marker:
1224                for (var i = 0; i < 2; ++i) {
1225                    var shifted_token = token_stack.shift();
1226                    tag = shifted_token.length == 2 ? "strong" : "em";
1227                    span = text_stack.shift();
1228                    span = this.runSpanGamut(span);
1229                    span = "<" + tag + ">" + span + "</" + tag + ">";
1230                    text_stack[0] = this.hashPart(span);
1231                    if(tag == 'strong') { strong = ''; } else { em = ''; }
1232                }
1233            } else {
1234                // Reached opening three-char emphasis marker. Push on token
1235                // stack; will be handled by the special condition above.
1236                em = token.charAt(0);
1237                strong = em + em;
1238                token_stack.unshift(token);
1239                text_stack.unshift('');
1240                tree_char_em = true;
1241            }
1242        } else if (token_len == 2) {
1243            if (strong != '') {
1244                // Unwind any dangling emphasis marker:
1245                if (token_stack[0].length == 1) {
1246                    text_stack[1] += token_stack.shift();
1247                    var text_stack_prev0 = text_stack.shift(); // $text_stack[0] .= array_shift($text_stack);
1248                    text_stack[0] += text_stack_prev0;
1249                }
1250                // Closing strong marker:
1251                token_stack.shift();
1252                span = text_stack.shift();
1253                span = this.runSpanGamut(span);
1254                span = "<strong>" + span + "</strong>";
1255                text_stack[0] += this.hashPart(span);
1256                strong = '';
1257            } else {
1258                token_stack.unshift(token);
1259                text_stack.unshift('');
1260                strong = token;
1261            }
1262        } else {
1263            // Here $token_len == 1
1264            if (em != '') {
1265                if (token_stack[0].length == 1) {
1266                    // Closing emphasis marker:
1267                    token_stack.shift();
1268                    span = text_stack.shift();
1269                    span = this.runSpanGamut(span);
1270                    span = "<em>" + span + "</em>";
1271                    text_stack[0] += this.hashPart(span);
1272                    em = '';
1273                } else {
1274                    text_stack[0] += token;
1275                }
1276            } else {
1277                token_stack.unshift(token);
1278                text_stack.unshift('');
1279                em = token;
1280            }
1281        }
1282    }
1283    return text_stack[0];
1284};
1285
1286
1287Markdown_Parser.prototype.doBlockQuotes = function(text) {
1288    var self = this;
1289    text = text.replace(new RegExp(
1290        '('              + // Wrap whole match in $1
1291          '(?:'          +
1292            '^[ ]*>[ ]?' + // ">" at the start of a line
1293              '.+\\n'    + // rest of the first line
1294            '(.+\\n)*'   + // subsequent consecutive lines
1295            '\\n*'       + // blanks
1296          ')+'           +
1297        ')',
1298        'mg'
1299    ), function(match, bq) {
1300        //console.log(match);
1301        // trim one level of quoting - trim whitespace-only lines
1302        bq = bq.replace(/^[ ]*>[ ]?|^[ ]+$/mg, '');
1303        bq = self.runBlockGamut(bq);		// recurse
1304
1305        bq = bq.replace(/^/mg, "  ");
1306        // These leading spaces cause problem with <pre> content,
1307        // so we need to fix that:
1308        bq = bq.replace(/(\\s*<pre>[\\s\\S]+?<\/pre>)/mg, function(match, pre) {
1309            //console.log(match);
1310            pre = pre.replace(/^  /m, '');
1311            return pre;
1312        });
1313
1314        return "\n" + self.hashBlock("<blockquote>\n" + bq + "\n</blockquote>") + "\n\n";
1315    });
1316    return text;
1317};
1318
1319/**
1320 * Params:
1321 * $text - string to process with html <p> tags
1322 */
1323Markdown_Parser.prototype.formParagraphs = function(text) {
1324
1325    // Strip leading and trailing lines:
1326    text = this.__wrapSTXETX__(text);
1327    text = text.replace(/(?:\x02)\n+|\n+(?:\x03)/g, "");
1328    text = this.__unwrapSTXETX__(text);
1329    // [porting note]
1330    // below may be faster than js regexp.
1331    //for(var s = 0; s < text.length && text.charAt(s) == "\n"; s++) { }
1332    //text = text.substr(s);
1333    //for(var e = text.length; e > 0 && text.charAt(e - 1) == "\n"; e--) { }
1334    //text = text.substr(0, e);
1335
1336    var grafs = text.split(/\n{2,}/m);
1337    //preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1338
1339    //
1340    // Wrap <p> tags and unhashify HTML blocks
1341    //
1342    for(var i = 0; i < grafs.length; i++) {
1343        var value = grafs[i];
1344        if(value == "") {
1345            // [porting note]
1346            // This case is replacement for PREG_SPLIT_NO_EMPTY.
1347        }
1348        else if (!value.match(/^B\x1A[0-9]+B$/)) {
1349            // Is a paragraph.
1350            value = this.runSpanGamut(value);
1351            value = value.replace(/^([ ]*)/, "<p>");
1352            value += "</p>";
1353            grafs[i] = this.unhash(value);
1354        }
1355        else {
1356            // Is a block.
1357            // Modify elements of @grafs in-place...
1358            var graf = value;
1359            var block = this.html_hashes[graf];
1360            graf = block;
1361            //if (preg_match('{
1362            //	\A
1363            //	(							# $1 = <div> tag
1364            //	  <div  \s+
1365            //	  [^>]*
1366            //	  \b
1367            //	  markdown\s*=\s*  ([\'"])	#	$2 = attr quote char
1368            //	  1
1369            //	  \2
1370            //	  [^>]*
1371            //	  >
1372            //	)
1373            //	(							# $3 = contents
1374            //	.*
1375            //	)
1376            //	(</div>)					# $4 = closing tag
1377            //	\z
1378            //	}xs', $block, $matches))
1379            //{
1380            //	list(, $div_open, , $div_content, $div_close) = $matches;
1381            //
1382            //	# We can't call Markdown(), because that resets the hash;
1383            //	# that initialization code should be pulled into its own sub, though.
1384            //	$div_content = $this->hashHTMLBlocks($div_content);
1385            //
1386            //	# Run document gamut methods on the content.
1387            //	foreach ($this->document_gamut as $method => $priority) {
1388            //		$div_content = $this->$method($div_content);
1389            //	}
1390            //
1391            //	$div_open = preg_replace(
1392            //		'{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1393            //
1394            //	$graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1395            //}
1396            grafs[i] = graf;
1397        }
1398    }
1399
1400    return grafs.join("\n\n");
1401};
1402
1403/**
1404 * Encode text for a double-quoted HTML attribute. This function
1405 * is *not* suitable for attributes enclosed in single quotes.
1406 */
1407Markdown_Parser.prototype.encodeAttribute = function(text) {
1408    text = this.encodeAmpsAndAngles(text);
1409    text = text.replace(/"/g, '&quot;');
1410    return text;
1411};
1412
1413/**
1414 * Smart processing for ampersands and angle brackets that need to
1415 * be encoded. Valid character entities are left alone unless the
1416 * no-entities mode is set.
1417 */
1418Markdown_Parser.prototype.encodeAmpsAndAngles = function(text) {
1419    if (this.no_entities) {
1420        text = text.replace(/&/g, '&amp;');
1421    } else {
1422        // Ampersand-encoding based entirely on Nat Irons's Amputator
1423        // MT plugin: <http://bumppo.net/projects/amputator/>
1424        text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/, '&amp;');
1425    }
1426    // Encode remaining <'s
1427    text = text.replace(/</g, '&lt;');
1428
1429    return text;
1430};
1431
1432Markdown_Parser.prototype.doAutoLinks = function(text) {
1433    var self = this;
1434    text = text.replace(/<((https?|ftp|dict):[^'">\s]+)>/i, function(match, address) {
1435        //console.log(match);
1436        var url = self.encodeAttribute(address);
1437        var link = "<a href=\"" + url + "\">" + url + "</a>";
1438        return self.hashPart(link);
1439    });
1440
1441    // Email addresses: <address@domain.foo>
1442    text = text.replace(new RegExp(
1443        '<'                            +
1444        '(?:mailto:)?'                 +
1445        '('                            +
1446            '(?:'                      +
1447                '[-!#$%&\'*+/=?^_`.{|}~\\w\\x80-\\xFF]+' +
1448            '|'                        +
1449                '".*?"'                +
1450            ')'                        +
1451            '\\@'                      +
1452            '(?:'                      +
1453                '[-a-z0-9\\x80-\\xFF]+(\\.[-a-z0-9\\x80-\\xFF]+)*\\.[a-z]+' +
1454            '|'                        +
1455                '\\[[\\d.a-fA-F:]+\\]' +  // IPv4 & IPv6
1456            ')'                        +
1457        ')'                            +
1458        '>',
1459        'i'
1460    ), function(match, address) {
1461        //console.log(match);
1462        var link = self.encodeEmailAddress(address);
1463        return self.hashPart(link);
1464    });
1465
1466    return text;
1467};
1468
1469/**
1470 *  Input: an email address, e.g. "foo@example.com"
1471 *
1472 *  Output: the email address as a mailto link, with each character
1473 *      of the address encoded as either a decimal or hex entity, in
1474 *      the hopes of foiling most address harvesting spam bots. E.g.:
1475 *
1476 *    <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1477 *        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1478 *        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1479 *        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1480 *
1481 *   Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1482 *   With some optimizations by Milian Wolff.
1483 */
1484Markdown_Parser.prototype.encodeEmailAddress = function(addr) {
1485    if('undefined' === typeof arguments.callee.crctable) {
1486        arguments.callee.crctable =
1487            "00000000 77073096 EE0E612C 990951BA 076DC419 706AF48F E963A535 9E6495A3 " +
1488            "0EDB8832 79DCB8A4 E0D5E91E 97D2D988 09B64C2B 7EB17CBD E7B82D07 90BF1D91 " +
1489            "1DB71064 6AB020F2 F3B97148 84BE41DE 1ADAD47D 6DDDE4EB F4D4B551 83D385C7 " +
1490            "136C9856 646BA8C0 FD62F97A 8A65C9EC 14015C4F 63066CD9 FA0F3D63 8D080DF5 " +
1491            "3B6E20C8 4C69105E D56041E4 A2677172 3C03E4D1 4B04D447 D20D85FD A50AB56B " +
1492            "35B5A8FA 42B2986C DBBBC9D6 ACBCF940 32D86CE3 45DF5C75 DCD60DCF ABD13D59 " +
1493            "26D930AC 51DE003A C8D75180 BFD06116 21B4F4B5 56B3C423 CFBA9599 B8BDA50F " +
1494            "2802B89E 5F058808 C60CD9B2 B10BE924 2F6F7C87 58684C11 C1611DAB B6662D3D " +
1495            "76DC4190 01DB7106 98D220BC EFD5102A 71B18589 06B6B51F 9FBFE4A5 E8B8D433 " +
1496            "7807C9A2 0F00F934 9609A88E E10E9818 7F6A0DBB 086D3D2D 91646C97 E6635C01 " +
1497            "6B6B51F4 1C6C6162 856530D8 F262004E 6C0695ED 1B01A57B 8208F4C1 F50FC457 " +
1498            "65B0D9C6 12B7E950 8BBEB8EA FCB9887C 62DD1DDF 15DA2D49 8CD37CF3 FBD44C65 " +
1499            "4DB26158 3AB551CE A3BC0074 D4BB30E2 4ADFA541 3DD895D7 A4D1C46D D3D6F4FB " +
1500            "4369E96A 346ED9FC AD678846 DA60B8D0 44042D73 33031DE5 AA0A4C5F DD0D7CC9 " +
1501            "5005713C 270241AA BE0B1010 C90C2086 5768B525 206F85B3 B966D409 CE61E49F " +
1502            "5EDEF90E 29D9C998 B0D09822 C7D7A8B4 59B33D17 2EB40D81 B7BD5C3B C0BA6CAD " +
1503            "EDB88320 9ABFB3B6 03B6E20C 74B1D29A EAD54739 9DD277AF 04DB2615 73DC1683 " +
1504            "E3630B12 94643B84 0D6D6A3E 7A6A5AA8 E40ECF0B 9309FF9D 0A00AE27 7D079EB1 " +
1505            "F00F9344 8708A3D2 1E01F268 6906C2FE F762575D 806567CB 196C3671 6E6B06E7 " +
1506            "FED41B76 89D32BE0 10DA7A5A 67DD4ACC F9B9DF6F 8EBEEFF9 17B7BE43 60B08ED5 " +
1507            "D6D6A3E8 A1D1937E 38D8C2C4 4FDFF252 D1BB67F1 A6BC5767 3FB506DD 48B2364B " +
1508            "D80D2BDA AF0A1B4C 36034AF6 41047A60 DF60EFC3 A867DF55 316E8EEF 4669BE79 " +
1509            "CB61B38C BC66831A 256FD2A0 5268E236 CC0C7795 BB0B4703 220216B9 5505262F " +
1510            "C5BA3BBE B2BD0B28 2BB45A92 5CB36A04 C2D7FFA7 B5D0CF31 2CD99E8B 5BDEAE1D " +
1511            "9B64C2B0 EC63F226 756AA39C 026D930A 9C0906A9 EB0E363F 72076785 05005713 " +
1512            "95BF4A82 E2B87A14 7BB12BAE 0CB61B38 92D28E9B E5D5BE0D 7CDCEFB7 0BDBDF21 " +
1513            "86D3D2D4 F1D4E242 68DDB3F8 1FDA836E 81BE16CD F6B9265B 6FB077E1 18B74777 " +
1514            "88085AE6 FF0F6A70 66063BCA 11010B5C 8F659EFF F862AE69 616BFFD3 166CCF45 " +
1515            "A00AE278 D70DD2EE 4E048354 3903B3C2 A7672661 D06016F7 4969474D 3E6E77DB " +
1516            "AED16A4A D9D65ADC 40DF0B66 37D83BF0 A9BCAE53 DEBB9EC5 47B2CF7F 30B5FFE9 " +
1517            "BDBDF21C CABAC28A 53B39330 24B4A3A6 BAD03605 CDD70693 54DE5729 23D967BF " +
1518            "B3667A2E C4614AB8 5D681B02 2A6F2B94 B40BBE37 C30C8EA1 5A05DF1B 2D02EF8D".split(' ');
1519    }
1520    var crctable = arguments.callee.crctable;
1521    function _crc32(str) {
1522        var crc = 0;
1523        crc = crc ^ (-1);
1524        for (var i = 0; i < str.length; ++i) {
1525            var y = (crc ^ str.charCodeAt(i)) & 0xff;
1526            var x = "0x" + crctable[y];
1527            crc = (crc >>> 8) ^ x;
1528        }
1529        return (crc ^ (-1)) >>> 0;
1530    }
1531
1532    addr = "mailto:" + addr;
1533    var chars = [];
1534    var i;
1535    for(i = 0; i < addr.length; i++) {
1536        chars.push(addr.charAt(i));
1537    }
1538    var seed = Math.floor(Math.abs(_crc32(addr) / addr.length)); // # Deterministic seed.
1539
1540    for(i = 0; i < chars.length; i++) {
1541        var c = chars[i];
1542        var ord = c.charCodeAt(0);
1543        // Ignore non-ascii chars.
1544        if(ord < 128) {
1545            var r = (seed * (1 + i)) % 100; // Pseudo-random function.
1546            // roughly 10% raw, 45% hex, 45% dec
1547            // '@' *must* be encoded. I insist.
1548            if(r > 90 && c != '@') { /* do nothing */ }
1549            else if(r < 45) { chars[i] = '&#x' + ord.toString(16) + ';'; }
1550            else            { chars[i] = '&#' + ord.toString(10) + ';'; }
1551        }
1552    }
1553
1554    addr = chars.join('');
1555    var text = chars.splice(7, chars.length - 1).join(''); // text without `mailto:`
1556    addr = "<a href=\"" + addr + "\">" + text + "</a>";
1557
1558    return addr;
1559};
1560
1561/**
1562 * Take the string $str and parse it into tokens, hashing embeded HTML,
1563 * escaped characters and handling code spans.
1564*/
1565Markdown_Parser.prototype.parseSpan = function(str) {
1566    var output = '';
1567
1568    var span_re = new RegExp(
1569            '('                          +
1570                '\\\\' + this.escape_chars_re +
1571            '|'                          +
1572                // This expression is too difficult for JS: '(?<![`\\\\])'
1573                // Resoled by hand coded process.
1574                '`+'                     + // code span marker
1575        (this.no_markup ? '' : (
1576            '|'                          +
1577                '<!--.*?-->'             + // comment
1578            '|'                          +
1579                '<\\?.*?\\?>|<%.*?%>'    + // processing instruction
1580            '|'                          +
1581                '<[/!$]?[-a-zA-Z0-9:_]+' + // regular tags
1582                '(?='                    +
1583                    '\\s'                +
1584                    '(?=[^"\'>]+|"[^"]*"|\'[^\']*\')*' +
1585                ')?'                     +
1586                '>'
1587        )) +
1588            ')'
1589    );
1590
1591    while(1) {
1592        //
1593        // Each loop iteration seach for either the next tag, the next
1594        // openning code span marker, or the next escaped character.
1595        // Each token is then passed to handleSpanToken.
1596        //
1597        var parts = str.match(span_re); //PREG_SPLIT_DELIM_CAPTURE
1598        if(parts) {
1599            if(RegExp.leftContext) {
1600                output += RegExp.leftContext;
1601            }
1602            // Back quote but after backslash is to be ignored.
1603            if(RegExp.lastMatch.charAt(0) == "`" &&
1604               RegExp.leftContext.charAt(RegExp.leftContext.length - 1) == "\\"
1605            ) {
1606                output += RegExp.lastMatch;
1607                str = RegExp.rightContext;
1608                continue;
1609            }
1610            var r = this.handleSpanToken(RegExp.lastMatch, RegExp.rightContext);
1611            output += r[0];
1612            str = r[1];
1613        }
1614        else {
1615            output += str;
1616            break;
1617        }
1618    }
1619    return output;
1620};
1621
1622
1623/**
1624 * Handle $token provided by parseSpan by determining its nature and
1625 * returning the corresponding value that should replace it.
1626*/
1627Markdown_Parser.prototype.handleSpanToken = function(token, str) {
1628    //console.log([token, str]);
1629    switch (token.charAt(0)) {
1630        case "\\":
1631            return [this.hashPart("&#" + token.charCodeAt(1) + ";"), str];
1632        case "`":
1633            // Search for end marker in remaining text.
1634            if (str.match(new RegExp('^([\\s\\S]*?[^`])' + this._php_preg_quote(token) + '(?!`)([\\s\\S]*)$', 'm'))) {
1635                var code = RegExp.$1;
1636                str = RegExp.$2;
1637                var codespan = this.makeCodeSpan(code);
1638                return [this.hashPart(codespan), str];
1639            }
1640            return [token, str]; // return as text since no ending marker found.
1641        default:
1642            return [this.hashPart(token), str];
1643    }
1644};
1645
1646/**
1647 * Remove one level of line-leading tabs or spaces
1648 */
1649Markdown_Parser.prototype.outdent = function(text) {
1650    return text.replace(new RegExp('^(\\t|[ ]{1,' + this.tab_width + '})', 'mg'), '');
1651};
1652
1653
1654//# String length function for detab. `_initDetab` will create a function to
1655//# hanlde UTF-8 if the default function does not exist.
1656//var $utf8_strlen = 'mb_strlen';
1657
1658/**
1659 * Replace tabs with the appropriate amount of space.
1660 */
1661Markdown_Parser.prototype.detab = function(text) {
1662    // For each line we separate the line in blocks delemited by
1663    // tab characters. Then we reconstruct every line by adding the
1664    // appropriate number of space between each blocks.
1665    var self = this;
1666    return text.replace(/^.*\t.*$/mg, function(line) {
1667        //$strlen = $this->utf8_strlen; # strlen function for UTF-8.
1668        // Split in blocks.
1669        var blocks = line.split("\t");
1670        // Add each blocks to the line.
1671        line = blocks.shift(); // Do not add first block twice.
1672        for(var i = 0; i < blocks.length; i++) {
1673            var block = blocks[i];
1674            // Calculate amount of space, insert spaces, insert block.
1675            var amount = self.tab_width - line.length % self.tab_width;
1676            line += self._php_str_repeat(" ", amount) + block;
1677        }
1678        return line;
1679    });
1680};
1681
1682/**
1683 * Swap back in all the tags hashed by _HashHTMLBlocks.
1684 */
1685Markdown_Parser.prototype.unhash = function(text) {
1686    var self = this;
1687    return text.replace(/(.)\x1A[0-9]+\1/g, function(match) {
1688        return self.html_hashes[match];
1689    });
1690};
1691/*-------------------------------------------------------------------------*/
1692
1693/**
1694 * Constructor function. Initialize the parser object.
1695 */
1696function MarkdownExtra_Parser() {
1697
1698    // Prefix for footnote ids.
1699    this.fn_id_prefix = "";
1700
1701    // Optional title attribute for footnote links and backlinks.
1702    this.fn_link_title = MARKDOWN_FN_LINK_TITLE;
1703    this.fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
1704
1705    // Optional class attribute for footnote links and backlinks.
1706    this.fn_link_class = MARKDOWN_FN_LINK_CLASS;
1707    this.fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
1708
1709    // Predefined abbreviations.
1710    this.predef_abbr = {};
1711
1712    // Extra variables used during extra transformations.
1713    this.footnotes = {};
1714    this.footnotes_ordered = [];
1715    this.abbr_desciptions = {};
1716    this.abbr_word_re = '';
1717
1718    // Give the current footnote number.
1719    this.footnote_counter = 1;
1720
1721    // ### HTML Block Parser ###
1722
1723    // Tags that are always treated as block tags:
1724    this.block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
1725
1726    // Tags treated as block tags only if the opening tag is alone on it's line:
1727    this.context_block_tags_re = 'script|noscript|math|ins|del';
1728
1729    // Tags where markdown="1" default to span mode:
1730    this.contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1731
1732    // Tags which must not have their contents modified, no matter where
1733    // they appear:
1734    this.clean_tags_re = 'script|math';
1735
1736    // Tags that do not need to be closed.
1737    this.auto_close_tags_re = 'hr|img';
1738
1739    // Redefining emphasis markers so that emphasis by underscore does not
1740    // work in the middle of a word.
1741    this.em_relist = [
1742        ['' , '(?:(^|[^\\*])(\\*)(?=[^\\*])|(^|[^a-zA-Z0-9_])(_)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'],
1743        ['*', '((?:\\S|^)[^\\*])(\\*)(?!\\*)'],
1744        ['_', '((?:\\S|^)[^_])(_)(?![a-zA-Z0-9_])']
1745    ];
1746    this.strong_relist = [
1747        ['' , '(?:(^|[^\\*])(\\*\\*)(?=[^\\*])|(^|[^a-zA-Z0-9_])(__)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'],
1748        ['**', '((?:\\S|^)[^\\*])(\\*\\*)(?!\\*)'],
1749        ['__', '((?:\\S|^)[^_])(__)(?![a-zA-Z0-9_])']
1750    ];
1751    this.em_strong_relist = [
1752        ['' , '(?:(^|[^\\*])(\\*\\*\\*)(?=[^\\*])|(^|[^a-zA-Z0-9_])(___)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'],
1753        ['***', '((?:\\S|^)[^\\*])(\\*\\*\\*)(?!\\*)'],
1754        ['___', '((?:\\S|^)[^_])(___)(?![a-zA-Z0-9_])']
1755    ];
1756
1757    // Add extra escapable characters before parent constructor
1758    // initialize the table.
1759    this.escape_chars += ':|';
1760
1761    // Insert extra document, block, and span transformations.
1762    // Parent constructor will do the sorting.
1763    this.document_gamut.push(['doFencedCodeBlocks',  5]);
1764    this.document_gamut.push(['stripFootnotes',     15]);
1765    this.document_gamut.push(['stripAbbreviations', 25]);
1766    this.document_gamut.push(['appendFootnotes',    50]);
1767
1768    this.block_gamut.push(['doFencedCodeBlocks',  5]);
1769    this.block_gamut.push(['doTables',           15]);
1770    this.block_gamut.push(['doDefLists',         45]);
1771
1772    this.span_gamut.push(['doFootnotes',      5]);
1773    this.span_gamut.push(['doAbbreviations', 70]);
1774}
1775MarkdownExtra_Parser.prototype = new Markdown_Parser();
1776
1777/**
1778 * Setting up Extra-specific variables.
1779 */
1780MarkdownExtra_Parser.prototype.setup = function() {
1781    this.constructor.prototype.setup.call(this);
1782
1783    this.footnotes = {};
1784    this.footnotes_ordered = [];
1785    this.abbr_desciptions = {};
1786    this.abbr_word_re = '';
1787    this.footnote_counter = 1;
1788
1789    for(var abbr_word in this.predef_abbr) {
1790        var abbr_desc = this.predef_abbr[abbr_word];
1791        if(this.abbr_word_re != '') {
1792            this.abbr_word_re += '|';
1793        }
1794        this.abbr_word_re += this._php_preg_quote(abbr_word); // ?? str -> re?
1795        this.abbr_desciptions[abbr_word] = this._php_trim(abbr_desc);
1796    }
1797};
1798
1799/**
1800 * Clearing Extra-specific variables.
1801 */
1802MarkdownExtra_Parser.prototype.teardown = function() {
1803    this.footnotes = {};
1804    this.footnotes_ordered = [];
1805    this.abbr_desciptions = {};
1806    this.abbr_word_re = '';
1807
1808    this.constructor.prototype.teardown.call(this);
1809};
1810
1811
1812/**
1813 * Hashify HTML Blocks and "clean tags".
1814 *
1815 * We only want to do this for block-level HTML tags, such as headers,
1816 * lists, and tables. That's because we still want to wrap <p>s around
1817 * "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1818 * phrase emphasis, and spans. The list of tags we're looking for is
1819 * hard-coded.
1820 *
1821 * This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1822 * _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
1823 * attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
1824 *  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1825 * These two functions are calling each other. It's recursive!
1826 */
1827MarkdownExtra_Parser.prototype.hashHTMLBlocks = function(text) {
1828    //
1829    // Call the HTML-in-Markdown hasher.
1830    //
1831    var r = this._hashHTMLBlocks_inMarkdown(text);
1832    text = r[0];
1833
1834    return text;
1835};
1836
1837/**
1838 * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1839 *
1840 * *   $indent is the number of space to be ignored when checking for code
1841 *     blocks. This is important because if we don't take the indent into
1842 *     account, something like this (which looks right) won't work as expected:
1843 *
1844 *     <div>
1845 *         <div markdown="1">
1846 *         Hello World.  <-- Is this a Markdown code block or text?
1847 *         </div>  <-- Is this a Markdown code block or a real tag?
1848 *     <div>
1849 *
1850 *     If you don't like this, just don't indent the tag on which
1851 *     you apply the markdown="1" attribute.
1852 *
1853 * *   If $enclosing_tag_re is not empty, stops at the first unmatched closing
1854 *     tag with that name. Nested tags supported.
1855 *
1856 * *   If $span is true, text inside must treated as span. So any double
1857 *     newline will be replaced by a single newline so that it does not create
1858 *     paragraphs.
1859 *
1860 * Returns an array of that form: ( processed text , remaining text )
1861 */
1862MarkdownExtra_Parser.prototype._hashHTMLBlocks_inMarkdown = function(text, indent, enclosing_tag_re, span) {
1863    if('undefined' === typeof indent) { indent = 0; }
1864    if('undefined' === typeof enclosing_tag_re) { enclosing_tag_re = ''; }
1865    if('undefined' === typeof span) { span = false; }
1866
1867    if(text === '') { return ['', '']; }
1868
1869    var matches;
1870
1871    // Regex to check for the presense of newlines around a block tag.
1872    var newline_before_re = /(?:^\n?|\n\n)*$/;
1873    var newline_after_re = new RegExp(
1874        '^'                 + // Start of text following the tag.
1875        '([ ]*<!--.*?-->)?' + // Optional comment.
1876        '[ ]*\\n'           , // Must be followed by newline.
1877        'm'
1878    );
1879
1880    // Regex to match any tag.
1881    var block_tag_re = new RegExp(
1882        '('                        + // $2: Capture hole tag.
1883            '</?'                  + // Any opening or closing tag.
1884                '('                + // Tag name.
1885                    this.block_tags_re         + '|' +
1886                    this.context_block_tags_re + '|' +
1887                    this.clean_tags_re         + '|' +
1888                    '(?!\\s)' + enclosing_tag_re +
1889                ')'                +
1890                '(?:'              +
1891                    '(?=[\\s"\'/a-zA-Z0-9])' + // Allowed characters after tag name.
1892                    '(?=('            +
1893                        '".*?"|'   + // Double quotes (can contain `>`)
1894                        '\'.*?\'|' + // Single quotes (can contain `>`)
1895                        '.+?'      + // Anything but quotes and `>`.
1896                    '))\\3*?'          +
1897                ')?'               +
1898            '>'                    + // End of tag.
1899        '|'                        +
1900            '<!--.*?-->'           + // HTML Comment
1901        '|'                        +
1902            '<\\?.*?\\?>|<%.*?%>'  + // Processing instruction
1903        '|'                        +
1904            '<!\\[CDATA\\[.*?\\]\\]>' + // CData Block
1905        '|'                        +
1906            // Code span marker
1907            '`+'                   +
1908        ( !span ? // If not in span.
1909        '|'                        +
1910            // Indented code block
1911            '(?:^[ ]*\\n|^|\\n[ ]*\\n)' +
1912            '[ ]{' + (indent + 4) + '}[^\\n]*\\n' +
1913            '(?='                  +
1914                '(?:[ ]{' + (indent + 4) + '}[^\\n]*|[ ]*)\\n' +
1915            ')*'                   +
1916        '|'                        +
1917            // Fenced code block marker
1918            '(?:^|\\n)'            +
1919            '[ ]{0,' + indent + '}~~~+[ ]*\\n'
1920        : '' ) + // # End (if not is span).
1921        ')',
1922        'm'
1923    );
1924
1925    var depth = 0;		// Current depth inside the tag tree.
1926    var parsed = "";	// Parsed text that will be returned.
1927
1928    //
1929    // Loop through every tag until we find the closing tag of the parent
1930    // or loop until reaching the end of text if no parent tag specified.
1931    //
1932    do {
1933        //
1934        // Split the text using the first $tag_match pattern found.
1935        // Text before  pattern will be first in the array, text after
1936        // pattern will be at the end, and between will be any catches made
1937        // by the pattern.
1938        //
1939        var parts_available = text.match(block_tag_re); //PREG_SPLIT_DELIM_CAPTURE
1940        var parts;
1941        if(!parts_available) {
1942            parts = [text];
1943        }
1944        else {
1945            parts = [RegExp.leftContext, RegExp.lastMatch, RegExp.rightContext];
1946        }
1947
1948        // If in Markdown span mode, add a empty-string span-level hash
1949        // after each newline to prevent triggering any block element.
1950        if(span) {
1951            var _void = this.hashPart("", ':');
1952            var newline = _void + "\n";
1953            parts[0] = _void + parts[0].replace(/\n/g, newline) + _void;
1954        }
1955
1956        parsed += parts[0]; // Text before current tag.
1957
1958        // If end of $text has been reached. Stop loop.
1959        if(!parts_available) {
1960            text = "";
1961            break;
1962        }
1963
1964        var tag  = parts[1]; // Tag to handle.
1965        text = parts[2]; // Remaining text after current tag.
1966        var tag_re = this._php_preg_quote(tag); // For use in a regular expression.
1967
1968        var t;
1969        var block_text;
1970        //
1971        // Check for: Code span marker
1972        //
1973
1974		if (tag.charAt(0) == "`") {
1975            // Find corresponding end marker.
1976            tag_re = this._php_preg_quote(tag);
1977			if (matches = text.match(new RegExp('^((?=(.+?|\\n[^\\n])))/1*?[^`]' + tag_re + '[^`]'))) {
1978                // End marker found: pass text unchanged until marker.
1979                parsed += tag + matches[0];
1980                text = text.substr(matches[0].length);
1981            }
1982            else {
1983                // Unmatched marker: just skip it.
1984                parsed += tag;
1985            }
1986        }
1987        //
1988        // Check for: Fenced code block marker.
1989        //
1990        else if(tag.match(new RegExp('^\\n?[ ]{0,' + (indent + 3) + '}~'))) {
1991            // Fenced code block marker: find matching end marker.
1992            tag_re = this._php_preg_quote(this._php_trim(tag));
1993            if(matches = text.match(new RegExp('^(?:.*\\n)+?[ ]{0,' + indent + '}' + tag_re + '[ ]*\\n'))) {
1994                // End marker found: pass text unchanged until marker.
1995                parsed += tag + matches[0];
1996                text = text.substr(matches[0].length);
1997            }
1998            else {
1999                // No end marker: just skip it.
2000                parsed += tag;
2001            }
2002        }
2003        //
2004        // Check for: Indented code block.
2005        //
2006        else if(tag.charAt(0) == "\n" || tag.charAt(0) == " ") {
2007            // Indented code block: pass it unchanged, will be handled
2008            // later.
2009            parsed += tag;
2010        }
2011        //
2012        // Check for: Opening Block level tag or
2013        //            Opening Context Block tag (like ins and del)
2014        //               used as a block tag (tag is alone on it's line).
2015        //
2016        else if (tag.match(new RegExp('^<(?:' + this.block_tags_re + ')\\b')) ||
2017            (
2018                tag.match(new RegExp('^<(?:' + this.context_block_tags_re + ')\\b')) &&
2019                parsed.match(newline_before_re) &&
2020                text.match(newline_after_re)
2021            )
2022        ) {
2023            // Need to parse tag and following text using the HTML parser.
2024            t = this._hashHTMLBlocks_inHTML(tag + text, this.hashBlock, true);
2025            block_text = t[0];
2026            text = t[1];
2027
2028            // Make sure it stays outside of any paragraph by adding newlines.
2029            parsed += "\n\n" + block_text + "\n\n";
2030        }
2031        //
2032        // Check for: Clean tag (like script, math)
2033        //            HTML Comments, processing instructions.
2034        //
2035        else if(
2036            tag.match(new RegExp('^<(?:' + this.clean_tags_re + ')\\b')) ||
2037            tag.charAt(1) == '!' || tag.charAt(1) == '?'
2038        ) {
2039            // Need to parse tag and following text using the HTML parser.
2040            // (don't check for markdown attribute)
2041            t = this._hashHTMLBlocks_inHTML(tag + text, this.hashClean, false);
2042            block_text = t[0];
2043            text = t[1];
2044
2045            parsed += block_text;
2046        }
2047        //
2048        // Check for: Tag with same name as enclosing tag.
2049        //
2050        else if (enclosing_tag_re !== '' &&
2051            // Same name as enclosing tag.
2052            tag.match(new RegExp('^</?(?:' + enclosing_tag_re + ')\\b'))
2053        ) {
2054            //
2055            // Increase/decrease nested tag count.
2056            //
2057            if (tag.charAt(1) == '/') depth--;
2058            else if (tag.charAt(tag.length - 2) != '/') depth++;
2059
2060            if(depth < 0) {
2061                //
2062                // Going out of parent element. Clean up and break so we
2063                // return to the calling function.
2064                //
2065                text = tag + text;
2066                break;
2067            }
2068
2069            parsed += tag;
2070        }
2071        else {
2072            parsed += tag;
2073        }
2074    } while(depth >= 0);
2075
2076    return [parsed, text];
2077};
2078
2079/**
2080 * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
2081 *
2082 * *   Calls $hash_method to convert any blocks.
2083 * *   Stops when the first opening tag closes.
2084 * *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
2085 *     (it is not inside clean tags)
2086 *
2087 * Returns an array of that form: ( processed text , remaining text )
2088 */
2089MarkdownExtra_Parser.prototype._hashHTMLBlocks_inHTML = function(text, hash_method, md_attr) {
2090    if(text === '') return ['', ''];
2091
2092    var matches;
2093
2094    // Regex to match `markdown` attribute inside of a tag.
2095    var markdown_attr_re = new RegExp(
2096        '\\s*'           + // Eat whitespace before the `markdown` attribute
2097        'markdown'       +
2098        '\\s*=\\s*'      +
2099        '(?:'            +
2100            '(["\'])'    + // $1: quote delimiter
2101            '(.*?)'      + // $2: attribute value
2102            '\\1'        + // matching delimiter
2103        '|'              +
2104            '([^\\s>]*)' + // $3: unquoted attribute value
2105        ')'              +
2106        '()'               // $4: make $3 always defined (avoid warnings)
2107    );
2108
2109    // Regex to match any tag.
2110    var tag_re = new RegExp(
2111        '('                           + // $2: Capture hole tag.
2112            '</?'                     + // Any opening or closing tag.
2113                '[\\w:$]+'            + // Tag name.
2114                '(?:'                 +
2115                    '(?=[\\s"\'/a-zA-Z0-9])' + // Allowed characters after tag name.
2116                    '(?:'             +
2117                    '(?=('            +
2118                        '".*?"|'      + // Double quotes (can contain `>`)
2119                        '\'.*?\'|'    + // Single quotes (can contain `>`)
2120                        '.+?'         + // Anything but quotes and `>`.
2121                    '))\\4'           +
2122                    ')*?'             +
2123                ')?'                  +
2124            '>'                       + // End of tag.
2125        '|'                           +
2126            '<!--.*?-->'              + // HTML Comment
2127        '|'                           +
2128            '<\\?.*?\\?>|<%.*?%>'     + // Processing instruction
2129        '|'                           +
2130            '<!\\[CDATA\\[.*?\\]\\]>' + // CData Block
2131        ')'
2132    );
2133
2134    var original_text = text; // Save original text in case of faliure.
2135
2136    var depth      = 0;  // Current depth inside the tag tree.
2137    var block_text = ""; // Temporary text holder for current text.
2138    var parsed     = ""; // Parsed text that will be returned.
2139
2140    //
2141    // Get the name of the starting tag.
2142    // (This pattern makes $base_tag_name_re safe without quoting.)
2143    //
2144    var base_tag_name_re = "";
2145    if(matches = text.match(/^<([\w:$]*)\b/)) {
2146        base_tag_name_re = matches[1];
2147    }
2148
2149    //
2150    // Loop through every tag until we find the corresponding closing tag.
2151    //
2152    do {
2153        //
2154        // Split the text using the first $tag_match pattern found.
2155        // Text before  pattern will be first in the array, text after
2156        // pattern will be at the end, and between will be any catches made
2157        // by the pattern.
2158        //
2159        var parts_available = text.match(tag_re); //PREG_SPLIT_DELIM_CAPTURE);
2160        // If end of $text has been reached. Stop loop.
2161        if(!parts_available) {
2162            //
2163            // End of $text reached with unbalenced tag(s).
2164            // In that case, we return original text unchanged and pass the
2165            // first character as filtered to prevent an infinite loop in the
2166            // parent function.
2167            //
2168            return [original_text.charAt(0), original_text.substr(1)];
2169        }
2170        var parts = [RegExp.leftContext, RegExp.lastMatch, RegExp.rightContext];
2171
2172        block_text += parts[0]; // Text before current tag.
2173        var tag     = parts[1]; // Tag to handle.
2174        text        = parts[2]; // Remaining text after current tag.
2175
2176        //
2177        // Check for: Auto-close tag (like <hr/>)
2178        //			 Comments and Processing Instructions.
2179        //
2180        if(tag.match(new RegExp('^</?(?:' + this.auto_close_tags_re + ')\\b')) ||
2181            tag.charAt(1) == '!' || tag.charAt(1) == '?')
2182        {
2183            // Just add the tag to the block as if it was text.
2184            block_text += tag;
2185        }
2186        else {
2187            //
2188            // Increase/decrease nested tag count. Only do so if
2189            // the tag's name match base tag's.
2190            //
2191            if (tag.match(new RegExp('^</?' + base_tag_name_re + '\\b'))) {
2192                if(tag.charAt(1) == '/') { depth--; }
2193                else if(tag.charAt(tag.length - 2) != '/') { depth++; }
2194            }
2195
2196            //
2197            // Check for `markdown="1"` attribute and handle it.
2198            //
2199            var attr_m;
2200            if(md_attr &&
2201                (attr_m = tag.match(markdown_attr_re)) &&
2202                (attr_m[2] + attr_m[3]).match(/^1|block|span$/))
2203            {
2204                // Remove `markdown` attribute from opening tag.
2205                tag = tag.replace(markdown_attr_re, '');
2206
2207                // Check if text inside this tag must be parsed in span mode.
2208                this.mode = attr_m[2] + attr_m[3];
2209                var span_mode = this.mode == 'span' || this.mode != 'block' &&
2210                    tag.match(new RegExp('^<(?:' + this.contain_span_tags_re + ')\\b'));
2211
2212                // Calculate indent before tag.
2213                var indent;
2214                if (matches = block_text.match(/(?:^|\n)( *?)(?! ).*?$/)) {
2215                    //var strlen = this.utf8_strlen;
2216                    indent = matches[1].length; //strlen(matches[1], 'UTF-8');
2217                } else {
2218                    indent = 0;
2219                }
2220
2221                // End preceding block with this tag.
2222                block_text += tag;
2223                parsed += hash_method.call(this, block_text);
2224
2225                // Get enclosing tag name for the ParseMarkdown function.
2226                // (This pattern makes $tag_name_re safe without quoting.)
2227                matches = tag.match(/^<([\w:$]*)\b/);
2228                var tag_name_re = matches[1];
2229
2230                // Parse the content using the HTML-in-Markdown parser.
2231                var t = this._hashHTMLBlocks_inMarkdown(text, indent, tag_name_re, span_mode);
2232                block_text = t[0];
2233                text = t[1];
2234
2235                // Outdent markdown text.
2236                if(indent > 0) {
2237                    block_text = block_text.replace(new RegExp('/^[ ]{1,' + indent + '}', 'm'), "");
2238                }
2239
2240                // Append tag content to parsed text.
2241                if (!span_mode) { parsed += "\n\n" + block_text + "\n\n"; }
2242                else { parsed += block_text; }
2243
2244                // Start over a new block.
2245                block_text = "";
2246            }
2247            else {
2248                block_text += tag;
2249            }
2250        }
2251
2252    } while(depth > 0);
2253
2254    //
2255    // Hash last block text that wasn't processed inside the loop.
2256    //
2257    parsed += hash_method.call(this, block_text);
2258
2259    return [parsed, text];
2260};
2261
2262
2263/**
2264 * Called whenever a tag must be hashed when a function insert a "clean" tag
2265 * in $text, it pass through this function and is automaticaly escaped,
2266 * blocking invalid nested overlap.
2267 */
2268MarkdownExtra_Parser.prototype.hashClean = function(text) {
2269    return this.hashPart(text, 'C');
2270};
2271
2272
2273/**
2274 * Redefined to add id attribute support.
2275 */
2276MarkdownExtra_Parser.prototype.doHeaders = function(text) {
2277    var self = this;
2278
2279    function _doHeaders_attr(attr) {
2280        if('undefined' === typeof attr || attr == "") {  return ""; }
2281        return " id=\"" + attr + "\"";
2282    }
2283
2284    // Setext-style headers:
2285    //    Header 1  {#header1}
2286    //    ========
2287    //
2288    //    Header 2  {#header2}
2289    //    --------
2290
2291    text = text.replace(new RegExp(
2292        '(^.+?)'                              + // $1: Header text
2293        '(?:[ ]+\\{\\#([-_:a-zA-Z0-9]+)\\})?' + // $2: Id attribute
2294        '[ ]*\\n(=+|-+)[ ]*\\n+',               // $3: Header footer
2295         'mg'
2296    ), function(match, span, id, line) {
2297       //console.log(match);
2298        if(line == '-' && span.match(/^- /)) {
2299            return match;
2300        }
2301        var level = line.charAt(0) == '=' ? 1 : 2;
2302        var attr = _doHeaders_attr(id);
2303        var block = "<h" + level + attr + ">" + self.runSpanGamut(span) + "</h" + level + ">";
2304        return "\n" + self.hashBlock(block)  + "\n\n";
2305    });
2306
2307    // atx-style headers:
2308    //    # Header 1        {#header1}
2309    //    ## Header 2       {#header2}
2310    //    ## Header 2 with closing hashes ##  {#header3}
2311    //    ...
2312    //    ###### Header 6   {#header2}
2313
2314    text = text.replace(new RegExp(
2315        '^(\\#{1,6})' + // $1 = string of #\'s
2316        '[ ]*'        +
2317        '(.+?)'       + // $2 = Header text
2318        '[ ]*'        +
2319        '\\#*'        + // optional closing #\'s (not counted)
2320        '(?:[ ]+\\{\\#([-_:a-zA-Z0-9]+)\\})?' + // id attribute
2321        '\\n+',
2322        'mg'
2323    ), function(match, hashes, span, id) {
2324        //console.log(match);
2325        var level = hashes.length;
2326        var attr = _doHeaders_attr(id);
2327        var block = "<h" + level + attr + ">" + self.runSpanGamut(span) + "</h" + level + ">";
2328        return "\n" + self.hashBlock(block) + "\n\n";
2329    });
2330
2331    return text;
2332};
2333
2334/**
2335 * Form HTML tables.
2336 */
2337MarkdownExtra_Parser.prototype.doTables = function(text) {
2338    var self = this;
2339
2340    var less_than_tab = this.tab_width - 1;
2341
2342    var _doTable_callback = function(match, head, underline, content) {
2343        //console.log(match);
2344        // Remove any tailing pipes for each line.
2345        head = head.replace(/[|] *$/m, '');
2346        underline = underline.replace(/[|] *$/m, '');
2347        content = content.replace(/[|] *$/m, '');
2348
2349        var attr = [];
2350
2351        // Reading alignement from header underline.
2352        var separators = underline.split(/[ ]*[|][ ]*/);
2353        var n;
2354        for(n = 0; n < separators.length; n++) {
2355            var s = separators[n];
2356            if (s.match(/^ *-+: *$/))       { attr[n] = ' align="right"'; }
2357            else if (s.match(/^ *:-+: *$/)) { attr[n] = ' align="center"'; }
2358            else if (s.match(/^ *:-+ *$/))  { attr[n] = ' align="left"'; }
2359            else                            { attr[n] = ''; }
2360        }
2361
2362        // Parsing span elements, including code spans, character escapes,
2363        // and inline HTML tags, so that pipes inside those gets ignored.
2364        head = self.parseSpan(head);
2365        var headers = head.split(/ *[|] */);
2366        var col_count = headers.length;
2367
2368        // Write column headers.
2369        var text = "<table>\n";
2370        text += "<thead>\n";
2371        text += "<tr>\n";
2372        for(n = 0; n < headers.length; n++) {
2373            var header = headers[n];
2374            text += "  <th" + attr[n] + ">" + self.runSpanGamut(self._php_trim(header)) + "</th>\n";
2375        }
2376        text += "</tr>\n";
2377        text += "</thead>\n";
2378
2379        // Split content by row.
2380        var rows = self._php_trim(content, "\n").split("\n");
2381
2382        text += "<tbody>\n";
2383        for(var i = 0; i < rows.length; i++) {
2384            var row = rows[i];
2385            // Parsing span elements, including code spans, character escapes,
2386            // and inline HTML tags, so that pipes inside those gets ignored.
2387            row = self.parseSpan(row);
2388
2389            // Split row by cell.
2390            var row_cells = row.split(/ *[|] */, col_count);
2391            while(row_cells.length < col_count) { row_cells.push(''); }
2392
2393            text += "<tr>\n";
2394            for(n = 0; n < row_cells.length; n++) {
2395                var cell = row_cells[n];
2396                text += "  <td" + attr[n] + ">" + self.runSpanGamut(self._php_trim(cell)) + "</td>\n";
2397            }
2398            text += "</tr>\n";
2399        }
2400        text += "</tbody>\n";
2401        text += "</table>";
2402
2403        return self.hashBlock(text) + "\n";
2404    };
2405
2406    text = this.__wrapSTXETX__(text);
2407
2408    //
2409    // Find tables with leading pipe.
2410    //
2411    //	| Header 1 | Header 2
2412    //	| -------- | --------
2413    //	| Cell 1   | Cell 2
2414    //	| Cell 3   | Cell 4
2415    //
2416    text = text.replace(new RegExp(
2417        '^'                            + // Start of a line
2418        '[ ]{0,' + less_than_tab + '}' + // Allowed whitespace.
2419        '[|]'                          + // Optional leading pipe (present)
2420        '(.+)\\n'                      + // $1: Header row (at least one pipe)
2421
2422        '[ ]{0,' + less_than_tab + '}' + // Allowed whitespace.
2423        '[|]([ ]*[-:]+[-| :]*)\\n'     + // $2: Header underline
2424
2425        '('                            + // $3: Cells
2426            '(?:'                      +
2427                '[ ]*'                 + // Allowed whitespace.
2428                '[|].*\\n'             + // Row content.
2429            ')*'                       +
2430        ')'                            +
2431        '(?=\\n|\\x03)'                , // Stop at final double newline.
2432        'mg'
2433    ), function(match, head, underline, content) {
2434        // Remove leading pipe for each row.
2435        content = content.replace(/^ *[|]/m, '');
2436
2437        return _doTable_callback.call(this, match, head, underline, content);
2438    });
2439
2440    //
2441    // Find tables without leading pipe.
2442    //
2443    //	Header 1 | Header 2
2444    //	-------- | --------
2445    //	Cell 1   | Cell 2
2446    //	Cell 3   | Cell 4
2447    //
2448    text = text.replace(new RegExp(
2449        '^'                             + // Start of a line
2450        '[ ]{0,' + less_than_tab + '}'  + // Allowed whitespace.
2451        '(\\S.*[|].*)\\n'               + // $1: Header row (at least one pipe)
2452
2453        '[ ]{0,' + less_than_tab + '}'  + // Allowed whitespace.
2454        '([-:]+[ ]*[|][-| :]*)\\n'      + // $2: Header underline
2455
2456        '('                             + // $3: Cells
2457            '(?:'                       +
2458                '.*[|].*\\n'            + // Row content
2459            ')*'                        +
2460        ')'                             +
2461        '(?=\\n|\\x03)'                 , // Stop at final double newline.
2462        'mg'
2463    ), _doTable_callback);
2464
2465    text = this.__unwrapSTXETX__(text);
2466
2467    return text;
2468};
2469
2470/**
2471 * Form HTML definition lists.
2472 */
2473MarkdownExtra_Parser.prototype.doDefLists = function(text) {
2474    var self = this;
2475
2476    var less_than_tab = this.tab_width - 1;
2477
2478    // Re-usable pattern to match any entire dl list:
2479    var whole_list_re = '(?:'     +
2480        '('                       + // $1 = whole list
2481          '('                     + // $2
2482            '[ ]{0,' + less_than_tab + '}' +
2483            '((?:[ \\t]*\\S.*\\n)+)' + // $3 = defined term
2484                                       // [porting note] Original regex from PHP is
2485                                       // (?>.*\S.*\n), which matches a line with at
2486                                       // least one non-space character. Change the
2487                                       // first .* to [ \t]* stops unneccessary
2488                                       // backtracking hence improves performance
2489            '\\n?'                +
2490            '[ ]{0,' + less_than_tab + '}:[ ]+' + // colon starting definition
2491          ')'                     +
2492          '([\\s\\S]+?)'          +
2493          '('                     + // $4
2494              '(?=\\0x03)'        + // \z
2495            '|'                   +
2496              '(?='               + // [porting note] Our regex will consume leading
2497                                    // newline characters so we will leave the newlines
2498                                    // here for the next definition
2499                '\\n{2,}'         +
2500                '(?=\\S)'         +
2501                '(?!'             + // Negative lookahead for another term
2502                  '[ ]{0,' + less_than_tab + '}' +
2503                  '(?:\\S.*\\n)+?' + // defined term
2504                  '\\n?'          +
2505                  '[ ]{0,' + less_than_tab + '}:[ ]+' + // colon starting definition
2506                ')'               +
2507                '(?!'             + // Negative lookahead for another definition
2508                  '[ ]{0,' + less_than_tab + '}:[ ]+' + // colon starting definition
2509                ')'               +
2510              ')'                 +
2511          ')'                     +
2512        ')'                       +
2513    ')'; // mx
2514
2515    text = this.__wrapSTXETX__(text);
2516    text = text.replace(new RegExp(
2517        '(\\x02\\n?|\\n\\n)' +
2518        whole_list_re, 'mg'
2519    ), function(match, pre, list) {
2520        //console.log(match);
2521        // Re-usable patterns to match list item bullets and number markers:
2522        // [portiong note] changed to list = $2 in order to reserve previously \n\n.
2523
2524        // Turn double returns into triple returns, so that we can make a
2525        // paragraph for the last item in a list, if necessary:
2526        var result = self._php_trim(self.processDefListItems(list));
2527        result = "<dl>\n" + result + "\n</dl>";
2528        return pre + self.hashBlock(result) + "\n\n";
2529    });
2530    text = this.__unwrapSTXETX__(text);
2531
2532    return text;
2533};
2534
2535/**
2536 * Process the contents of a single definition list, splitting it
2537 * into individual term and definition list items.
2538 */
2539MarkdownExtra_Parser.prototype.processDefListItems = function(list_str) {
2540    var self = this;
2541
2542    var less_than_tab = this.tab_width - 1;
2543
2544    list_str = this.__wrapSTXETX__(list_str);
2545
2546    // trim trailing blank lines:
2547    list_str = list_str.replace(/\n{2,}(?=\\x03)/, "\n");
2548
2549    // Process definition terms.
2550    list_str = list_str.replace(new RegExp(
2551        '(\\x02\\n?|\\n\\n+)'              + // leading line
2552        '('                                + // definition terms = $1
2553            '[ ]{0,' + less_than_tab + '}' + // leading whitespace
2554            '(?![:][ ]|[ ])'               + // negative lookahead for a definition
2555                                             //   mark (colon) or more whitespace.
2556            '(?:\\S.*\\n)+?'               + // actual term (not whitespace).
2557        ')'                                +
2558        '(?=\\n?[ ]{0,3}:[ ])'             , // lookahead for following line feed
2559                                             //   with a definition mark.
2560        'mg'
2561    ), function(match, pre, terms_str) {
2562        // [portiong note] changed to list = $2 in order to reserve previously \n\n.
2563        var terms = self._php_trim(terms_str).split("\n");
2564        var text = '';
2565        for (var i = 0; i < terms.length; i++) {
2566            var term = terms[i];
2567            term = self.runSpanGamut(self._php_trim(term));
2568            text += "\n<dt>" + term + "</dt>";
2569        }
2570        return text + "\n";
2571    });
2572
2573    // Process actual definitions.
2574    list_str = list_str.replace(new RegExp(
2575        '\\n(\\n+)?'                       + // leading line = $1
2576        '('                                + // marker space = $2
2577            '[ ]{0,' + less_than_tab + '}' + // whitespace before colon
2578            '[:][ ]+'                      + // definition mark (colon)
2579        ')'                                +
2580        '([\\s\\S]+?)'                     + // definition text = $3
2581                                             // [porting note] Maybe no trailing
2582                                             // newlines in our version, changed the
2583                                             // following line from \n+ to \n*.
2584        '(?=\\n*'                          + // stop at next definition mark,
2585            '(?:'                          + // next term or end of text
2586                '\\n[ ]{0,' + less_than_tab + '}[:][ ]|' + // [porting note] do not match
2587                                                           // colon in the middle of a line
2588                '<dt>|\\x03'               + // \z
2589            ')'                            +
2590        ')',
2591        'mg'
2592    ), function(match, leading_line, marker_space, def) {
2593        if (leading_line || def.match(/\n{2,}/)) {
2594            // Replace marker with the appropriate whitespace indentation
2595            def = self._php_str_repeat(' ', marker_space.length) + def;
2596            def = self.runBlockGamut(self.outdent(def + "\n\n"));
2597            def = "\n" + def + "\n";
2598        }
2599        else {
2600            def = self._php_rtrim(def);
2601            def = self.runSpanGamut(self.outdent(def));
2602        }
2603
2604        return "\n<dd>"  + def + "</dd>\n";
2605    });
2606
2607    list_str = this.__unwrapSTXETX__(list_str);
2608
2609    return list_str;
2610};
2611
2612/**
2613 * Adding the fenced code block syntax to regular Markdown:
2614 *
2615 * ~~~
2616 * Code block
2617 * ~~~
2618 */
2619MarkdownExtra_Parser.prototype.doFencedCodeBlocks = function(text) {
2620    var self = this;
2621
2622    var less_than_tab = this.tab_width;
2623
2624    text = this.__wrapSTXETX__(text);
2625    text = text.replace(new RegExp(
2626		'(?:\\n|\\x02)'          +
2627        // 1: Opening marker
2628		'('                      +
2629            '~{3,}'              + // Marker: three tilde or more.
2630        ')'                      +
2631        '[ ]*\\n'                + // Whitespace and newline following marker.
2632
2633        // 2: Content
2634		'('                      +
2635			'(?:'                +
2636			'(?=('               +
2637                '(?!\\1[ ]*\\n)' + // Not a closing marker.
2638                '.*\\n+'         +
2639            '))\\3'              +
2640            ')+'                 +
2641		')'                      +
2642
2643        // Closing marker.
2644        '\\1[ ]*\\n',
2645        "mg"
2646    ), function(match, m1, codeblock) {
2647        codeblock = self._php_htmlspecialchars_ENT_NOQUOTES(codeblock);
2648        codeblock = codeblock.replace(/^\n+/, function(match) {
2649            return self._php_str_repeat("<br" + self.empty_element_suffix, match.length);
2650        });
2651        codeblock = "<pre><code>" + codeblock + "</code></pre>";
2652        return "\n\n" + self.hashBlock(codeblock) + "\n\n";
2653    });
2654    text = this.__unwrapSTXETX__(text);
2655
2656    return text;
2657};
2658
2659/**
2660 * Params:
2661 * $text - string to process with html <p> tags
2662 */
2663MarkdownExtra_Parser.prototype.formParagraphs = function(text) {
2664
2665    // Strip leading and trailing lines:
2666    text = this.__wrapSTXETX__(text);
2667    text = text.replace(/(?:\x02)\n+|\n+(?:\x03)/g, "");
2668    text = this.__unwrapSTXETX__(text);
2669
2670    var grafs = text.split(/\n{2,}/m);
2671    //preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2672
2673    //
2674    // Wrap <p> tags and unhashify HTML blocks
2675    //
2676    for(var i = 0; i < grafs.length; i++) {
2677        var value = grafs[i];
2678        if(value == "") {
2679            // [porting note]
2680            // This case is replacement for PREG_SPLIT_NO_EMPTY.
2681            continue;
2682        }
2683        value = this._php_trim(this.runSpanGamut(value));
2684
2685        // Check if this should be enclosed in a paragraph.
2686        // Clean tag hashes & block tag hashes are left alone.
2687        var is_p = !value.match(/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/);
2688
2689        if (is_p) {
2690            value = "<p>" + value + "</p>";
2691        }
2692        grafs[i] = value;
2693    }
2694
2695    // Join grafs in one text, then unhash HTML tags.
2696    text = grafs.join("\n\n");
2697
2698    // Finish by removing any tag hashes still present in $text.
2699    text = this.unhash(text);
2700
2701    return text;
2702};
2703
2704// ### Footnotes
2705
2706/**
2707 * Strips link definitions from text, stores the URLs and titles in
2708 * hash references.
2709 */
2710MarkdownExtra_Parser.prototype.stripFootnotes = function(text) {
2711    var self = this;
2712
2713    var less_than_tab = this.tab_width - 1;
2714
2715    // Link defs are in the form: [^id]: url "optional title"
2716    text = text.replace(new RegExp(
2717        '^[ ]{0,' + less_than_tab + '}\\[\\^(.+?)\\][ ]?:' + // note_id = $1
2718          '[ ]*'                       +
2719          '\\n?'                       + // maybe *one* newline
2720        '('                            + // text = $2 (no blank lines allowed)
2721            '(?:'                      +
2722                '.+'                   + // actual text
2723            '|'                        +
2724                '\\n'                  + // newlines but
2725                '(?!\\[\\^.+?\\]:\\s)' + // negative lookahead for footnote marker.
2726                '(?!\\n+[ ]{0,3}\\S)'  + // ensure line is not blank and followed
2727                                         // by non-indented content
2728            ')*'                       +
2729        ')',
2730        "mg"
2731    ), function(match, m1, m2) {
2732        var note_id = self.fn_id_prefix + m1;
2733        self.footnotes[note_id] = self.outdent(m2);
2734        return ''; //# String that will replace the block
2735    });
2736    return text;
2737};
2738
2739/**
2740 * Replace footnote references in $text [^id] with a special text-token
2741 * which will be replaced by the actual footnote marker in appendFootnotes.
2742 */
2743MarkdownExtra_Parser.prototype.doFootnotes = function(text) {
2744    if (!this.in_anchor) {
2745        text = text.replace(/\[\^(.+?)\]/g, "F\x1Afn:$1\x1A:");
2746    }
2747    return text;
2748};
2749
2750/**
2751 * Append footnote list to text.
2752 */
2753MarkdownExtra_Parser.prototype.appendFootnotes = function(text) {
2754    var self = this;
2755
2756    var _appendFootnotes_callback = function(match, m1) {
2757        var node_id = self.fn_id_prefix + m1;
2758
2759        // Create footnote marker only if it has a corresponding footnote *and*
2760        // the footnote hasn't been used by another marker.
2761        if (node_id in self.footnotes) {
2762            // Transfert footnote content to the ordered list.
2763            self.footnotes_ordered.push([node_id, self.footnotes[node_id]]);
2764            delete self.footnotes[node_id];
2765
2766            var num = self.footnote_counter++;
2767            var attr = " rel=\"footnote\"";
2768            if (self.fn_link_class != "") {
2769                var classname = self.fn_link_class;
2770                classname = self.encodeAttribute(classname);
2771                attr += " class=\"" + classname + "\"";
2772            }
2773            if (self.fn_link_title != "") {
2774                var title = self.fn_link_title;
2775                title = self.encodeAttribute(title);
2776                attr += " title=\"" + title +"\"";
2777            }
2778
2779            attr = attr.replace(/%%/g, num);
2780            node_id = self.encodeAttribute(node_id);
2781
2782            return "<sup id=\"fnref:" + node_id + "\">" +
2783                "<a href=\"#fn:" + node_id + "\"" + attr + ">" + num + "</a>" +
2784                "</sup>";
2785        }
2786
2787        return "[^" + m1 + "]";
2788    };
2789
2790    text = text.replace(/F\x1Afn:(.*?)\x1A:/g, _appendFootnotes_callback);
2791
2792    if (this.footnotes_ordered.length > 0) {
2793        text += "\n\n";
2794        text += "<div class=\"footnotes\">\n";
2795        text += "<hr" + this.empty_element_suffix  + "\n";
2796        text += "<ol>\n\n";
2797
2798        var attr = " rev=\"footnote\"";
2799        if (this.fn_backlink_class != "") {
2800            var classname = this.fn_backlink_class;
2801            classname = this.encodeAttribute(classname);
2802            attr += " class=\"" + classname + "\"";
2803        }
2804        if (this.fn_backlink_title != "") {
2805            var title = this.fn_backlink_title;
2806            title = this.encodeAttribute(title);
2807            attr += " title=\"" + title + "\"";
2808        }
2809        var num = 0;
2810
2811        while (this.footnotes_ordered.length > 0) {
2812            var head = this.footnotes_ordered.shift();
2813            var note_id = head[0];
2814            var footnote = head[1];
2815
2816            footnote += "\n"; // Need to append newline before parsing.
2817            footnote = this.runBlockGamut(footnote + "\n");
2818            footnote = footnote.replace(/F\x1Afn:(.*?)\x1A:/g, _appendFootnotes_callback);
2819
2820            attr = attr.replace(/%%/g, ++num);
2821            note_id = this.encodeAttribute(note_id);
2822
2823            // Add backlink to last paragraph; create new paragraph if needed.
2824            var backlink = "<a href=\"#fnref:" + note_id + "\"" + attr + ">&#8617;</a>";
2825            if (footnote.match(/<\/p>$/)) {
2826                footnote = footnote.substr(0, footnote.length - 4) + "&#160;" + backlink + "</p>";
2827            } else {
2828                footnote += "\n\n<p>" + backlink + "</p>";
2829            }
2830
2831            text += "<li id=\"fn:" + note_id + "\">\n";
2832            text += footnote + "\n";
2833            text += "</li>\n\n";
2834        }
2835
2836        text += "</ol>\n";
2837        text += "</div>";
2838    }
2839    return text;
2840};
2841
2842//### Abbreviations ###
2843
2844/**
2845 * Strips abbreviations from text, stores titles in hash references.
2846 */
2847MarkdownExtra_Parser.prototype.stripAbbreviations = function(text) {
2848    var self = this;
2849
2850    var less_than_tab = this.tab_width - 1;
2851
2852    // Link defs are in the form: [id]*: url "optional title"
2853    text = text.replace(new RegExp(
2854        '^[ ]{0,' + less_than_tab + '}\\*\\[(.+?)\\][ ]?:' + // abbr_id = $1
2855        '(.*)',   // text = $2 (no blank lines allowed)
2856        "m"
2857    ), function(match, abbr_word, abbr_desc) {
2858        if (self.abbr_word_re != '') {
2859            self.abbr_word_re += '|';
2860        }
2861        self.abbr_word_re += self._php_preg_quote(abbr_word);
2862        self.abbr_desciptions[abbr_word] = self._php_trim(abbr_desc);
2863        return ''; // String that will replace the block
2864    });
2865    return text;
2866};
2867
2868/**
2869 * Find defined abbreviations in text and wrap them in <abbr> elements.
2870 */
2871MarkdownExtra_Parser.prototype.doAbbreviations = function(text) {
2872    var self = this;
2873
2874    if (this.abbr_word_re) {
2875        // cannot use the /x modifier because abbr_word_re may
2876        // contain significant spaces:
2877        text = text.replace(new RegExp(
2878            '(^|[^\\w\\x1A])'             +
2879            '(' + this.abbr_word_re + ')' +
2880            '(?![\\w\\x1A])'
2881        ), function(match, prev, abbr) {
2882            if (abbr in self.abbr_desciptions) {
2883                var desc = self.abbr_desciptions[abbr];
2884                if (!desc || desc == "") {
2885                    return self.hashPart("<abbr>" + abbr + "</abbr>");
2886                } else {
2887                    desc = self.encodeAttribute(desc);
2888                    return self.hashPart("<abbr title=\"" + desc + "\">" + abbr + "</abbr>");
2889                }
2890            } else {
2891                return match;
2892            }
2893        });
2894    }
2895    return text;
2896};
2897
2898
2899/**
2900 * Export to Node.js
2901 */
2902this.Markdown = Markdown;
2903this.Markdown_Parser = Markdown_Parser;
2904this.MarkdownExtra_Parser = MarkdownExtra_Parser;
2905
2906