1/*! 2 * Copyright (c) 2006 js-markdown-extra developers 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28var MARKDOWN_VERSION = "1.0.1o"; 29var MARKDOWNEXTRA_VERSION = "1.2.5"; 30 31// Global default settings: 32 33/** Change to ">" for HTML output */ 34var MARKDOWN_EMPTY_ELEMENT_SUFFIX = " />"; 35 36/** Define the width of a tab for code blocks. */ 37var MARKDOWN_TAB_WIDTH = 4; 38 39/** Optional title attribute for footnote links and backlinks. */ 40var MARKDOWN_FN_LINK_TITLE = ""; 41var MARKDOWN_FN_BACKLINK_TITLE = ""; 42 43/** Optional class attribute for footnote links and backlinks. */ 44var MARKDOWN_FN_LINK_CLASS = ""; 45var MARKDOWN_FN_BACKLINK_CLASS = ""; 46 47/** Change to false to remove Markdown from posts and/or comments. */ 48var MARKDOWN_WP_POSTS = true; 49var MARKDOWN_WP_COMMENTS = true; 50 51/** Standard Function Interface */ 52MARKDOWN_PARSER_CLASS = 'MarkdownExtra_Parser'; 53 54/** 55 * Converts Markdown formatted text to HTML. 56 * @param text Markdown text 57 * @return HTML 58 */ 59function Markdown(text) { 60 //Initialize the parser and return the result of its transform method. 61 var parser; 62 if('undefined' == typeof arguments.callee.parser) { 63 parser = eval("new " + MARKDOWN_PARSER_CLASS + "()"); 64 parser.init(); 65 arguments.callee.parser = parser; 66 } 67 else { 68 parser = arguments.callee.parser; 69 } 70 // Transform text using parser. 71 return parser.transform(text); 72} 73 74/** 75 * Constructor function. Initialize appropriate member variables. 76 */ 77function Markdown_Parser() { 78 79 this.nested_brackets_depth = 6; 80 this.nested_url_parenthesis_depth = 4; 81 this.escape_chars = "\\\\`*_{}[]()>#+-.!"; 82 83 // Document transformations 84 this.document_gamut = [ 85 // Strip link definitions, store in hashes. 86 ['stripLinkDefinitions', 20], 87 ['runBasicBlockGamut', 30] 88 ]; 89 90 // These are all the transformations that form block-level 91 /// tags like paragraphs, headers, and list items. 92 this.block_gamut = [ 93 ['doHeaders', 10], 94 ['doHorizontalRules', 20], 95 ['doLists', 40], 96 ['doCodeBlocks', 50], 97 ['doBlockQuotes', 60] 98 ]; 99 100 // These are all the transformations that occur *within* block-level 101 // tags like paragraphs, headers, and list items. 102 this.span_gamut = [ 103 // Process character escapes, code spans, and inline HTML 104 // in one shot. 105 ['parseSpan', -30], 106 // Process anchor and image tags. Images must come first, 107 // because ![foo][f] looks like an anchor. 108 ['doImages', 10], 109 ['doAnchors', 20], 110 // Make links out of things like `<http://example.com/>` 111 // Must come after doAnchors, because you can use < and > 112 // delimiters in inline links like [this](<url>). 113 ['doAutoLinks', 30], 114 ['encodeAmpsAndAngles', 40], 115 ['doItalicsAndBold', 50], 116 ['doHardBreaks', 60] 117 ]; 118 119 this.em_relist = [ 120 ['' , '(?:(^|[^\\*])(\\*)(?=[^\\*])|(^|[^_])(_)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'], 121 ['*', '((?:\\S|^)[^\\*])(\\*)(?!\\*)'], 122 ['_', '((?:\\S|^)[^_])(_)(?!_)'] 123 ]; 124 this.strong_relist = [ 125 ['' , '(?:(^|[^\\*])(\\*\\*)(?=[^\\*])|(^|[^_])(__)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'], 126 ['**', '((?:\\S|^)[^\\*])(\\*\\*)(?!\\*)'], 127 ['__', '((?:\\S|^)[^_])(__)(?!_)'] 128 ]; 129 this.em_strong_relist = [ 130 ['' , '(?:(^|[^\\*])(\\*\\*\\*)(?=[^\\*])|(^|[^_])(___)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'], 131 ['***', '((?:\\S|^)[^\\*])(\\*\\*\\*)(?!\\*)'], 132 ['___', '((?:\\S|^)[^_])(___)(?!_)'] 133 ]; 134} 135 136Markdown_Parser.prototype.init = function() { 137 // this._initDetab(); // NOTE: JavaScript string length is already based on Unicode 138 this.prepareItalicsAndBold(); 139 140 // Regex to match balanced [brackets]. 141 // Needed to insert a maximum bracked depth while converting to PHP. 142 // NOTE: JavaScript doesn't have so faster option for RegExp 143 //this.nested_brackets_re = new RegExp( 144 // str_repeat('(?>[^\\[\\]]+|\\[', this.nested_brackets_depth) + 145 // str_repeat('\\])*', this.nested_brackets_depth) 146 //); 147 // NOTE: JavaScript doesn't have so faster option for RegExp 148 //this.nested_url_parenthesis_re = new RegExp( 149 // str_repeat('(?>[^()\\s]+|\\(', this.nested_url_parenthesis_depth) + 150 // str_repeat('(?>\\)))*', this.nested_url_parenthesis_depth) 151 //); 152 153 this.nested_brackets_re = '(?:\\[[^\\]]*\]|[^\\[\\]]*)'; 154 this.nested_url_parenthesis_re = '(?:\\([^\\)\\s]*\\)|[^\\(\\)]*)'; 155 156 // Table of hash values for escaped characters: 157 var tmp = []; 158 for(var i = 0; i < this.escape_chars.length; i++) { 159 tmp.push(this._php_preg_quote(this.escape_chars.charAt(i))); 160 } 161 this.escape_chars_re = '[' + tmp.join('') + ']'; 162 163 // Change to ">" for HTML output. 164 this.empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX; 165 this.tab_width = MARKDOWN_TAB_WIDTH; 166 167 // Change to `true` to disallow markup or entities. 168 this.no_markup = false; 169 this.no_entities = false; 170 171 // Predefined urls and titles for reference links and images. 172 this.predef_urls = {}; 173 this.predef_titles = {}; 174 175 // Sort document, block, and span gamut in ascendent priority order. 176 function cmp_gamut(a, b) { 177 a = a[1]; b = b[1]; 178 return a > b ? 1 : a < b ? -1 : 0; 179 } 180 this.document_gamut.sort(cmp_gamut); 181 this.block_gamut.sort(cmp_gamut); 182 this.span_gamut.sort(cmp_gamut); 183 184 // Internal hashes used during transformation. 185 this.urls = {}; 186 this.titles = {}; 187 this.html_hashes = {}; 188 189 // Status flag to avoid invalid nesting. 190 this.in_anchor = false; 191}; 192 193/** 194 * [porting note] 195 * JavaScript's RegExp doesn't have escape code \A and \Z. 196 * So multiline pattern can't match start/end of text. Instead 197 * wrap whole of text with STX(02) and ETX(03). 198 */ 199Markdown_Parser.prototype.__wrapSTXETX__ = function(text) { 200 if(text.charAt(0) != '\x02') { text = '\x02' + text; } 201 if(text.charAt(text.length - 1) != '\x03') { text = text + '\x03'; } 202 return text; 203}; 204 205/** 206 * [porting note] 207 * Strip STX(02) and ETX(03). 208 */ 209Markdown_Parser.prototype.__unwrapSTXETX__ = function(text) { 210 if(text.charAt(0) == '\x02') { text = text.substr(1); } 211 if(text.charAt(text.length - 1) == '\x03') { text = text.substr(0, text.length - 1); } 212 return text; 213}; 214 215/** 216 * 217 */ 218Markdown_Parser.prototype._php_preg_quote = function(text) { 219 if(!arguments.callee.sRE) { 220 arguments.callee.sRE = /(\/|\.|\*|\+|\?|\||\(|\)|\[|\]|\{|\}\\)/g; 221 } 222 return text.replace(arguments.callee.sRE, '\\$1'); 223}; 224 225Markdown_Parser.prototype._php_str_repeat = function(str, n) { 226 var tmp = str; 227 for(var i = 1; i < n; i++) { 228 tmp += str; 229 } 230 return tmp; 231}; 232 233Markdown_Parser.prototype._php_trim = function(target, charlist) { 234 var chars = charlist || " \t\n\r"; 235 return target.replace( 236 new RegExp("^[" + chars + "]*|[" + chars + "]*$", "g"), "" 237 ); 238}; 239 240Markdown_Parser.prototype._php_rtrim = function(target, charlist) { 241 var chars = charlist || " \t\n\r"; 242 return target.replace( 243 new RegExp( "[" + chars + "]*$", "g" ), "" 244 ); 245}; 246 247Markdown_Parser.prototype._php_htmlspecialchars_ENT_NOQUOTES = function(str) { 248 return str.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>'); 249}; 250 251 252/** 253 * Called before the transformation process starts to setup parser 254 * states. 255 */ 256Markdown_Parser.prototype.setup = function() { 257 // Clear global hashes. 258 this.urls = this.predef_urls; 259 this.titles = this.predef_titles; 260 this.html_hashes = {}; 261 262 this.in_anchor = false; 263}; 264 265/** 266 * Called after the transformation process to clear any variable 267 * which may be taking up memory unnecessarly. 268 */ 269Markdown_Parser.prototype.teardown = function() { 270 this.urls = {}; 271 this.titles = {}; 272 this.html_hashes = {}; 273}; 274 275/** 276 * Main function. Performs some preprocessing on the input text 277 * and pass it through the document gamut. 278 */ 279Markdown_Parser.prototype.transform = function(text) { 280 this.setup(); 281 282 // Remove UTF-8 BOM and marker character in input, if present. 283 text = text.replace(/^\xEF\xBB\xBF|\x1A/, ""); 284 285 // Standardize line endings: 286 // DOS to Unix and Mac to Unix 287 text = text.replace(/\r\n?/g, "\n", text); 288 289 // Make sure $text ends with a couple of newlines: 290 text += "\n\n"; 291 292 // Convert all tabs to spaces. 293 text = this.detab(text); 294 295 // Turn block-level HTML blocks into hash entries 296 text = this.hashHTMLBlocks(text); 297 298 // Strip any lines consisting only of spaces and tabs. 299 // This makes subsequent regexen easier to write, because we can 300 // match consecutive blank lines with /\n+/ instead of something 301 // contorted like /[ ]*\n+/ . 302 text = text.replace(/^[ ]+$/m, ""); 303 304 // Run document gamut methods. 305 for(var i = 0; i < this.document_gamut.length; i++) { 306 var method = this[this.document_gamut[i][0]]; 307 if(method) { 308 text = method.call(this, text); 309 } 310 else { 311 console.log(this.document_gamut[i][0] + ' not implemented'); 312 } 313 } 314 315 this.teardown(); 316 317 return text + "\n"; 318}; 319 320Markdown_Parser.prototype.hashHTMLBlocks = function(text) { 321 if(this.no_markup) { return text; } 322 323 var less_than_tab = this.tab_width - 1; 324 325 // Hashify HTML blocks: 326 // We only want to do this for block-level HTML tags, such as headers, 327 // lists, and tables. That's because we still want to wrap <p>s around 328 // "paragraphs" that are wrapped in non-block-level tags, such as anchors, 329 // phrase emphasis, and spans. The list of tags we're looking for is 330 // hard-coded: 331 // 332 // * List "a" is made of tags which can be both inline or block-level. 333 // These will be treated block-level when the start tag is alone on 334 // its line, otherwise they're not matched here and will be taken as 335 // inline later. 336 // * List "b" is made of tags which are always block-level; 337 338 var block_tags_a_re = 'ins|del'; 339 var block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|' + 340 'script|noscript|form|fieldset|iframe|math'; 341 342 // Regular expression for the content of a block tag. 343 var nested_tags_level = 4; 344 var attr = 345 '(?:' + // optional tag attributes 346 '\\s' + // starts with whitespace 347 '(?:' + 348 '[^>"/]+' + // text outside quotes 349 '|' + 350 '/+(?!>)' + // slash not followed by ">" 351 '|' + 352 '"[^"]*"' + // text inside double quotes (tolerate ">") 353 '|' + 354 '\'[^\']*\'' + // text inside single quotes (tolerate ">") 355 ')*' + 356 ')?'; 357 var content = 358 this._php_str_repeat( 359 '(?:' + 360 '[^<]+' + // content without tag 361 '|' + 362 '<\\2' + // nested opening tag 363 attr + // attributes 364 '(?:' + 365 '/>' + 366 '|' + 367 '>', 368 nested_tags_level 369 ) + // end of opening tag 370 '.*?' + // last level nested tag content 371 this._php_str_repeat( 372 '</\\2\\s*>' + // closing nested tag 373 ')' + 374 '|' + 375 '<(?!/\\2\\s*>)' + // other tags with a different name 376 ')*', 377 nested_tags_level 378 ); 379 380 var content2 = content.replace('\\2', '\\3'); 381 382 // First, look for nested blocks, e.g.: 383 // <div> 384 // <div> 385 // tags for inner block must be indented. 386 // </div> 387 // </div> 388 // 389 // The outermost tags must start at the left margin for this to match, and 390 // the inner nested divs must be indented. 391 // We need to do this before the next, more liberal match, because the next 392 // match will start at the first `<div>` and stop at the first `</div>`. 393 var all = new RegExp('(?:' + 394 '(?:' + 395 '(?:\\n\\n)' + // Starting after a blank line 396 '|' + // or 397 '(?:\\x02)\\n?' + // the beginning of the doc 398 ')' + 399 '(' + // save in $1 400 401 // Match from `\n<tag>` to `</tag>\n`, handling nested tags 402 // in between. 403 '[ ]{0,' + less_than_tab + '}' + 404 '<(' + block_tags_b_re + ')' + // start tag = $2 405 attr + '>' + // attributes followed by > and \n 406 content + // content, support nesting 407 '</\\2>' + // the matching end tag 408 '[ ]*' + // trailing spaces/tabs 409 '(?=\\n+|\\n*\\x03)' + // followed by a newline or end of document 410 411 '|' + // Special version for tags of group a. 412 413 '[ ]{0,' + less_than_tab + '}' + 414 '<(' + block_tags_a_re + ')' + // start tag = $3 415 attr + '>[ ]*\\n' + // attributes followed by > 416 content2 + // content, support nesting 417 '</\\3>' + // the matching end tag 418 '[ ]*' + // trailing spaces/tabs 419 '(?=\\n+|\\n*\\x03)' + // followed by a newline or end of document 420 421 '|' + // Special case just for <hr />. It was easier to make a special 422 // case than to make the other regex more complicated. 423 424 '[ ]{0,' + less_than_tab + '}' + 425 '<(hr)' + // start tag = $2 426 attr + // attributes 427 '/?>' + // the matching end tag 428 '[ ]*' + 429 '(?=\\n{2,}|\\n*\\x03)' + // followed by a blank line or end of document 430 431 '|' + // Special case for standalone HTML comments: 432 433 '[ ]{0,' + less_than_tab + '}' + 434 '(?:' + //'(?s:' + 435 '<!--.*?-->' + 436 ')' + 437 '[ ]*' + 438 '(?=\\n{2,}|\\n*\\x03)' + // followed by a blank line or end of document 439 440 '|' + // PHP and ASP-style processor instructions (<? and <%) 441 442 '[ ]{0,' + less_than_tab + '}' + 443 '(?:' + //'(?s:' + 444 '<([?%])' + // $2 445 '.*?' + 446 '\\2>' + 447 ')' + 448 '[ ]*' + 449 '(?=\\n{2,}|\\n*\\x03)' + // followed by a blank line or end of document 450 451 ')' + 452 ')', 'mig'); 453 // FIXME: JS doesnt have enough escape sequence \A nor \Z. 454 455 var self = this; 456 text = this.__wrapSTXETX__(text); 457 text = text.replace(all, function(match, text) { 458 //console.log(match); 459 var key = self.hashBlock(text); 460 return "\n\n" + key + "\n\n"; 461 }); 462 text = this.__unwrapSTXETX__(text); 463 return text; 464}; 465 466/** 467 * Called whenever a tag must be hashed when a function insert an atomic 468 * element in the text stream. Passing $text to through this function gives 469 * a unique text-token which will be reverted back when calling unhash. 470 * 471 * The boundary argument specify what character should be used to surround 472 * the token. By convension, "B" is used for block elements that needs not 473 * to be wrapped into paragraph tags at the end, ":" is used for elements 474 * that are word separators and "X" is used in the general case. 475 */ 476Markdown_Parser.prototype.hashPart = function(text, boundary) { 477 if('undefined' === typeof boundary) { 478 boundary = 'X'; 479 } 480 // Swap back any tag hash found in text so we do not have to `unhash` 481 // multiple times at the end. 482 text = this.unhash(text); 483 484 // Then hash the block. 485 if('undefined' === typeof arguments.callee.i) { 486 arguments.callee.i = 0; 487 } 488 var key = boundary + "\x1A" + (++arguments.callee.i) + boundary; 489 this.html_hashes[key] = text; 490 return key; // String that will replace the tag. 491}; 492 493/** 494 * Shortcut function for hashPart with block-level boundaries. 495 */ 496Markdown_Parser.prototype.hashBlock = function(text) { 497 return this.hashPart(text, 'B'); 498}; 499 500/** 501 * Strips link definitions from text, stores the URLs and titles in 502 * hash references. 503 */ 504Markdown_Parser.prototype.stripLinkDefinitions = function(text) { 505 var less_than_tab = this.tab_width - 1; 506 var self = this; 507 // Link defs are in the form: ^[id]: url "optional title" 508 text = this.__wrapSTXETX__(text); 509 text = text.replace(new RegExp( 510 '^[ ]{0,' + less_than_tab + '}\\[(.+)\\][ ]?:' + // id = $1 511 '[ ]*' + 512 '\\n?' + // maybe *one* newline 513 '[ ]*' + 514 '(?:' + 515 '<(.+?)>' + // url = $2 516 '|' + 517 '(\\S+?)' + // url = $3 518 ')' + 519 '[ ]*' + 520 '\\n?' + // maybe one newline 521 '[ ]*' + 522 '(?:' + 523 //'(?=\\s)' + // lookbehind for whitespace 524 '["\\(]' + 525 '(.*?)' + // title = $4 526 '["\\)]' + 527 '[ ]*' + 528 ')?' + // title is optional 529 '(?:\\n+|\\n*(?=\\x03))', 530 'mg'), function(match, id, url2, url3, title) { 531 //console.log(match); 532 var link_id = id.toLowerCase(); 533 var url = url2 ? url2 : url3; 534 self.urls[link_id] = url; 535 self.titles[link_id] = title; 536 return ''; // String that will replace the block 537 } 538 ); 539 text = this.__unwrapSTXETX__(text); 540 return text; 541}; 542 543/** 544 * Run block gamut tranformations. 545 */ 546Markdown_Parser.prototype.runBlockGamut = function(text) { 547 // We need to escape raw HTML in Markdown source before doing anything 548 // else. This need to be done for each block, and not only at the 549 // begining in the Markdown function since hashed blocks can be part of 550 // list items and could have been indented. Indented blocks would have 551 // been seen as a code block in a previous pass of hashHTMLBlocks. 552 text = this.hashHTMLBlocks(text); 553 return this.runBasicBlockGamut(text); 554}; 555 556/** 557 * Run block gamut tranformations, without hashing HTML blocks. This is 558 * useful when HTML blocks are known to be already hashed, like in the first 559 * whole-document pass. 560 */ 561Markdown_Parser.prototype.runBasicBlockGamut = function(text) { 562 for(var i = 0; i < this.block_gamut.length; i++) { 563 var method = this[this.block_gamut[i][0]]; 564 if(method) { 565 text = method.call(this, text); 566 } 567 else { 568 console.log(this.block_gamut[i][0] + ' not implemented'); 569 } 570 } 571 // Finally form paragraph and restore hashed blocks. 572 text = this.formParagraphs(text); 573 return text; 574}; 575 576/** 577 * Do Horizontal Rules: 578 */ 579Markdown_Parser.prototype.doHorizontalRules = function(text) { 580 var self = this; 581 return text.replace(new RegExp( 582 '^[ ]{0,3}' + // Leading space 583 '([-\\*_])' + // $1: First marker 584 '(?:' + // Repeated marker group 585 '[ ]{0,2}' + // Zero, one, or two spaces. 586 '\\1' + // Marker character 587 '){2,}' + // Group repeated at least twice 588 '[ ]*' + //Tailing spaces 589 '$' , // End of line. 590 'mg'), function(match) { 591 //console.log(match); 592 return "\n" + self.hashBlock("<hr" + self.empty_element_suffix) + "\n"; 593 }); 594}; 595 596/** 597 * Run span gamut tranformations. 598 */ 599Markdown_Parser.prototype.runSpanGamut = function(text) { 600 for(var i = 0; i < this.span_gamut.length; i++) { 601 var method = this[this.span_gamut[i][0]]; 602 if(method) { 603 text = method.call(this, text); 604 } 605 else { 606 console.log(this.span_gamut[i][0] + ' not implemented'); 607 } 608 } 609 return text; 610}; 611 612/** 613 * Do hard breaks: 614 */ 615Markdown_Parser.prototype.doHardBreaks = function(text) { 616 var self = this; 617 return text.replace(/ {2,}\n/mg, function(match) { 618 //console.log(match); 619 return self.hashPart("<br" + self.empty_element_suffix + "\n"); 620 }); 621}; 622 623 624/** 625 * Turn Markdown link shortcuts into XHTML <a> tags. 626 */ 627Markdown_Parser.prototype.doAnchors = function(text) { 628 if (this.in_anchor) return text; 629 this.in_anchor = true; 630 631 var self = this; 632 633 var _doAnchors_reference_callback = function(match, whole_match, link_text, link_id) { 634 //console.log(match); 635 if(typeof(link_id) !== 'string' || link_id === '') { 636 // for shortcut links like [this][] or [this]. 637 link_id = link_text; 638 } 639 640 // lower-case and turn embedded newlines into spaces 641 link_id = link_id.toLowerCase(); 642 link_id = link_id.replace(/[ ]?\n/, ' '); 643 644 var result; 645 if ('undefined' !== typeof self.urls[link_id]) { 646 var url = self.urls[link_id]; 647 url = self.encodeAttribute(url); 648 649 result = "<a href=\"" + url + "\""; 650 if ('undefined' !== typeof self.titles[link_id]) { 651 var title = self.titles[link_id]; 652 title = self.encodeAttribute(title); 653 result += " title=\"" + title + "\""; 654 } 655 656 link_text = self.runSpanGamut(link_text); 657 result += ">" + link_text + "</a>"; 658 result = self.hashPart(result); 659 } 660 else { 661 result = whole_match; 662 } 663 return result; 664 }; 665 666 // 667 // First, handle reference-style links: [link text] [id] 668 // 669 // [porting note] the cheatText and conditional 670 // are simply checks that look and see whether the regex will 671 // be able to find a match. If we don't do this here we can get caught in 672 // a situation where backtracking grows exponentially. 673 // This helps us keep the same regex as the upstream PHP impl, but still be safe/fast 674 var cheatText = text.replace(/[^\[^\]^\n^\s]/gm, ''); 675 if ((cheatText.indexOf("[][]") !== -1) || (cheatText.indexOf("[] []") !== -1) || (cheatText.indexOf("[]\n[]") !== -1)) { 676 text = text.replace(new RegExp( 677 '(' + // wrap whole match in $1 678 '\\[' + 679 '(' + this.nested_brackets_re + ')' + // link text = $2 680 '\\]' + 681 682 '[ ]?' + // one optional space 683 '(?:\\n[ ]*)?' + // one optional newline followed by spaces 684 685 '\\[' + 686 '(.*?)' + // id = $3 687 '\\]' + 688 ')', 689 'mg' 690 ), _doAnchors_reference_callback); 691 } 692 693 // 694 // Next, inline-style links: [link text](url "optional title") 695 // 696 // [porting note] the cheatText and conditional 697 // are simply checks that look and see whether the regex will 698 // be able to find a match. If we don't do this here we can get caught in 699 // a situation where backtracking grows exponentially. 700 // This helps us keep the same regex as the upstream PHP impl, but still be safe/fast 701 cheatText = text.replace(/[^\(^\)^\[^\]^\s]/gm, '').replace(/\(.*?\)/,'()'); 702 if ((cheatText.indexOf("]()") !== -1) || (cheatText.indexOf("](\"\")") !== -1)) { 703 text = text.replace(new RegExp( 704 '(' + // wrap whole match in $1 705 '\\[' + 706 '(' + this.nested_brackets_re + ')' + // link text = $2 707 '\\]' + 708 '\\(' + // literal paren 709 '[ \\n]*' + 710 '(?:' + 711 '<(.+?)>' + // href = $3 712 '|' + 713 '(' + this.nested_url_parenthesis_re + ')' + // href = $4 714 ')' + 715 '[ \\n]*' + 716 '(' + // $5 717 '([\'"])' + // quote char = $6 718 '(.*?)' + // Title = $7 719 '\\6' + // matching quote 720 '[ \\n]*' + // ignore any spaces/tabs between closing quote and ) 721 ')?' + // title is optional 722 '\\)' + 723 ')', 724 'mg' 725 ), function(match, whole_match, link_text, url3, url4, x0, x1, title) { 726 //console.log(match); 727 link_text = self.runSpanGamut(link_text); 728 var url = url3 ? url3 : url4; 729 730 url = self.encodeAttribute(url); 731 732 var result = "<a href=\"" + url + "\""; 733 if ('undefined' !== typeof title && title !== '') { 734 title = self.encodeAttribute(title); 735 result += " title=\"" + title + "\""; 736 } 737 738 link_text = self.runSpanGamut(link_text); 739 result += ">" + link_text + "</a>"; 740 741 return self.hashPart(result); 742 }); 743 } 744 745 746 // 747 // Last, handle reference-style shortcuts: [link text] 748 // These must come last in case you've also got [link text][1] 749 // or [link text](/foo) 750 // 751 text = text.replace(new RegExp( 752 '(' + // wrap whole match in $1 753 '\\[' + 754 '([^\\[\\]]+)' + // link text = $2; can\'t contain [ or ] 755 '\\]' + 756 ')', 757 'mg' 758 ), _doAnchors_reference_callback); 759 760 this.in_anchor = false; 761 return text; 762}; 763 764/** 765 * Turn Markdown image shortcuts into <img> tags. 766 */ 767Markdown_Parser.prototype.doImages = function(text) { 768 var self = this; 769 770 // 771 // First, handle reference-style labeled images: ![alt text][id] 772 // 773 cheatText = text.replace(/[^!^\[^\]^\n^\s]/gm, '').replace(/\[\s*\]/g, '[]'); 774 if ((cheatText.indexOf('![][]') !== -1) || (cheatText.indexOf('![] []') !== -1) || (cheatText.indexOf('![]\n[]') !== -1)) { 775 text = text.replace(new RegExp( 776 '(' + // wrap whole match in $1 777 '!\\[' + 778 '(' + this.nested_brackets_re + ')' + // alt text = $2 779 '\\]' + 780 781 '[ ]?' + // one optional space 782 '(?:\\n[ ]*)?' + // one optional newline followed by spaces 783 784 '\\[' + 785 '(.*?)' + // id = $3 786 '\\]' + 787 788 ')', 789 'mg' 790 ), function(match, whole_match, alt_text, link_id) { 791 //console.log(match); 792 link_id = link_id.toLowerCase(); 793 794 if (typeof(link_id) !== 'string' || link_id === '') { 795 link_id = alt_text.toLowerCase(); // for shortcut links like ![this][]. 796 } 797 798 alt_text = self.encodeAttribute(alt_text); 799 var result; 800 if ('undefined' !== typeof self.urls[link_id]) { 801 var url = self.encodeAttribute(self.urls[link_id]); 802 result = "<img src=\"" + url + "\" alt=\"" + alt_text + "\""; 803 if ('undefined' !== typeof self.titles[link_id]) { 804 var title = self.titles[link_id]; 805 title = self.encodeAttribute(title); 806 result += " title=\"" + title + "\""; 807 } 808 result += self.empty_element_suffix; 809 result = self.hashPart(result); 810 } 811 else { 812 // If there's no such link ID, leave intact: 813 result = whole_match; 814 } 815 816 return result; 817 }); 818 } 819 820 // 821 // Next, handle inline images: ![alt text](url "optional title") 822 // Don't forget: encode * and _ 823 // 824 cheatText = text.replace(/[^!^\(^\)^\[^\]^\n^\s]/gm, '').replace(/\[\s*\]/g, '[]'); 825 if ((cheatText.indexOf(']()') !== -1) || (cheatText.indexOf('] ()') !== -1) || (cheatText.indexOf(']\n()') !== -1)) { 826 text = text.replace(new RegExp( 827 '(' + // wrap whole match in $1 828 '!\\[' + 829 '(' + this.nested_brackets_re + ')' + // alt text = $2 830 '\\]' + 831 '\\s?' + // One optional whitespace character 832 '\\(' + // literal paren 833 '[ \\n]*' + 834 '(?:' + 835 '<(\\S*)>' + // src url = $3 836 '|' + 837 '(' + this.nested_url_parenthesis_re + ')' + // src url = $4 838 ')' + 839 '[ \\n]*' + 840 '(' + // $5 841 '([\'"])' + // quote char = $6 842 '(.*?)' + // title = $7 843 '\\6' + // matching quote 844 '[ \\n]*' + 845 ')?' + // title is optional 846 '\\)' + 847 ')', 848 'mg' 849 ), function(match, whole_match, alt_text, url3, url4, x5, x6, title) { 850 //console.log(match); 851 var url = url3 ? url3 : url4; 852 853 alt_text = self.encodeAttribute(alt_text); 854 url = self.encodeAttribute(url); 855 var result = "<img src=\"" + url + "\" alt=\"" + alt_text + "\""; 856 if ('undefined' !== typeof title && title !== '') { 857 title = self.encodeAttribute(title); 858 result += " title=\"" + title + "\""; // $title already quoted 859 } 860 result += self.empty_element_suffix; 861 862 return self.hashPart(result); 863 }); 864 } 865 866 return text; 867}; 868 869Markdown_Parser.prototype.doHeaders = function(text) { 870 var self = this; 871 // Setext-style headers: 872 // Header 1 873 // ======== 874 // 875 // Header 2 876 // -------- 877 // 878 text = text.replace(/^(.+?)[ ]*\n(=+|-+)[ ]*\n+/mg, function(match, span, line) { 879 //console.log(match); 880 // Terrible hack to check we haven't found an empty list item. 881 if(line == '-' && span.match(/^-(?: |$)/)) { 882 return match; 883 } 884 var level = line.charAt(0) == '=' ? 1 : 2; 885 var block = "<h" + level + ">" + self.runSpanGamut(span) + "</h" + level + ">"; 886 return "\n" + self.hashBlock(block) + "\n\n"; 887 }); 888 889 // atx-style headers: 890 // # Header 1 891 // ## Header 2 892 // ## Header 2 with closing hashes ## 893 // ... 894 // ###### Header 6 895 // 896 text = text.replace(new RegExp( 897 '^(\\#{1,6})' + // $1 = string of #\'s 898 '[ ]*' + 899 '(.+?)' + // $2 = Header text 900 '[ ]*' + 901 '\\#*' + // optional closing #\'s (not counted) 902 '\\n+', 903 'mg' 904 ), function(match, hashes, span) { 905 //console.log(match); 906 var level = hashes.length; 907 var block = "<h" + level + ">" + self.runSpanGamut(span) + "</h" + level + ">"; 908 return "\n" + self.hashBlock(block) + "\n\n"; 909 }); 910 911 return text; 912}; 913 914/** 915 * Form HTML ordered (numbered) and unordered (bulleted) lists. 916 */ 917Markdown_Parser.prototype.doLists = function(text) { 918 var less_than_tab = this.tab_width - 1; 919 920 // Re-usable patterns to match list item bullets and number markers: 921 var marker_ul_re = '[\\*\\+-]'; 922 var marker_ol_re = '\\d+[\\.]'; 923 var marker_any_re = "(?:" + marker_ul_re + "|" + marker_ol_re + ")"; 924 925 var self = this; 926 var _doLists_callback = function(match, list, x2, x3, type) { 927 //console.log(match); 928 // Re-usable patterns to match list item bullets and number markers: 929 var list_type = type.match(marker_ul_re) ? "ul" : "ol"; 930 931 var marker_any_re = list_type == "ul" ? marker_ul_re : marker_ol_re; 932 933 list += "\n"; 934 var result = self.processListItems(list, marker_any_re); 935 936 result = self.hashBlock("<" + list_type + ">\n" + result + "</" + list_type + ">"); 937 return "\n" + result + "\n\n"; 938 }; 939 940 var markers_relist = [ 941 [marker_ul_re, marker_ol_re], 942 [marker_ol_re, marker_ul_re] 943 ]; 944 945 for (var i = 0; i < markers_relist.length; i++) { 946 var marker_re = markers_relist[i][0]; 947 var other_marker_re = markers_relist[i][1]; 948 // Re-usable pattern to match any entirel ul or ol list: 949 var whole_list_re = 950 '(' + // $1 = whole list 951 '(' + // $2 952 '([ ]{0,' + less_than_tab + '})' + // $3 = number of spaces 953 '(' + marker_re + ')' + // $4 = first list item marker 954 '[ ]+' + 955 ')' + 956 '[\\s\\S]+?' + 957 '(' + // $5 958 '(?=\\x03)' + // \z 959 '|' + 960 '\\n{2,}' + 961 '(?=\\S)' + 962 '(?!' + // Negative lookahead for another list item marker 963 '[ ]*' + 964 marker_re + '[ ]+' + 965 ')' + 966 '|' + 967 '(?=' + // Lookahead for another kind of list 968 '\\n' + 969 '\\3' + // Must have the same indentation 970 other_marker_re + '[ ]+' + 971 ')' + 972 ')' + 973 ')'; // mx 974 975 // We use a different prefix before nested lists than top-level lists. 976 // See extended comment in _ProcessListItems(). 977 978 text = this.__wrapSTXETX__(text); 979 if (this.list_level) { 980 text = text.replace(new RegExp('^' + whole_list_re, "mg"), _doLists_callback); 981 } 982 else { 983 text = text.replace(new RegExp( 984 '(?:(?=\\n)\\n|\\x02\\n?)' + // Must eat the newline 985 whole_list_re, "mg" 986 ), _doLists_callback); 987 } 988 text = this.__unwrapSTXETX__(text); 989 } 990 991 return text; 992}; 993 994// var $list_level = 0; 995 996/** 997 * Process the contents of a single ordered or unordered list, splitting it 998 * into individual list items. 999 */ 1000Markdown_Parser.prototype.processListItems = function(list_str, marker_any_re) { 1001 // The $this->list_level global keeps track of when we're inside a list. 1002 // Each time we enter a list, we increment it; when we leave a list, 1003 // we decrement. If it's zero, we're not in a list anymore. 1004 // 1005 // We do this because when we're not inside a list, we want to treat 1006 // something like this: 1007 // 1008 // I recommend upgrading to version 1009 // 8. Oops, now this line is treated 1010 // as a sub-list. 1011 // 1012 // As a single paragraph, despite the fact that the second line starts 1013 // with a digit-period-space sequence. 1014 // 1015 // Whereas when we're inside a list (or sub-list), that line will be 1016 // treated as the start of a sub-list. What a kludge, huh? This is 1017 // an aspect of Markdown's syntax that's hard to parse perfectly 1018 // without resorting to mind-reading. Perhaps the solution is to 1019 // change the syntax rules such that sub-lists must start with a 1020 // starting cardinal number; e.g. "1." or "a.". 1021 1022 if('undefined' === typeof this.list_level) { 1023 this.list_level = 0; 1024 } 1025 this.list_level++; 1026 1027 // trim trailing blank lines: 1028 list_str = this.__wrapSTXETX__(list_str); 1029 list_str = list_str.replace(/\n{2,}(?=\x03)/m, "\n"); 1030 list_str = this.__unwrapSTXETX__(list_str); 1031 1032 var self = this; 1033 list_str = this.__wrapSTXETX__(list_str); 1034 list_str = list_str.replace(new RegExp( 1035 '(\\n)?' + // leading line = $1 1036 '([ ]*)' + // leading whitespace = $2 1037 '(' + marker_any_re + // list marker and space = $3 1038 '(?:[ ]+|(?=\\n))' + // space only required if item is not empty 1039 ')' + 1040 '([\\s\\S]*?)' + // list item text = $4 1041 '(?:(\\n+(?=\\n))|\\n)' + // tailing blank line = $5 1042 '(?=\\n*(\\x03|\\2(' + marker_any_re + ')(?:[ ]+|(?=\\n))))', 1043 "gm" 1044 ), function(match, leading_line, leading_space, marker_space, item, tailing_blank_line) { 1045 //console.log(match); 1046 //console.log(item, [leading_line ? leading_line.length : 0, tailing_blank_line ? tailing_blank_line.length : 0]); 1047 if (leading_line || tailing_blank_line || item.match(/\n{2,}/)) { 1048 // Replace marker with the appropriate whitespace indentation 1049 item = leading_space + self._php_str_repeat(' ', marker_space.length) + item; 1050 item = self.runBlockGamut(self.outdent(item) + "\n"); 1051 } 1052 else { 1053 // Recursion for sub-lists: 1054 item = self.doLists(self.outdent(item)); 1055 item = item.replace(/\n+$/m, ''); 1056 item = self.runSpanGamut(item); 1057 } 1058 1059 return "<li>" + item + "</li>\n"; 1060 }); 1061 list_str = this.__unwrapSTXETX__(list_str); 1062 1063 this.list_level--; 1064 return list_str; 1065}; 1066 1067/** 1068 * Process Markdown `<pre><code>` blocks. 1069 */ 1070Markdown_Parser.prototype.doCodeBlocks = function(text) { 1071 var self = this; 1072 text = this.__wrapSTXETX__(text); 1073 text = text.replace(new RegExp( 1074 '(?:^|\\n\\n|(?=\\x02)\\n)?' + 1075 '(' + // $1 = the code block -- one or more lines, starting with a space/tab 1076 '(?:' + 1077 '(?=(' + 1078 '[ ]{' + this.tab_width + ',}' + // Lines must start with a tab or a tab-width of spaces 1079 '.*\\n+' + 1080 '))\\2' + 1081 ')+' + 1082 ')' + 1083 '((?=^[ ]{0,' + this.tab_width + '}\\S)|(?:\\n*(?=\\x03)))', // Lookahead for non-space at line-start, or end of doc 1084 'mg' 1085 ), function(match, codeblock) { 1086 //console.log(match); 1087 codeblock = self.outdent(codeblock); 1088 codeblock = self._php_htmlspecialchars_ENT_NOQUOTES(codeblock); 1089 1090 // trim leading newlines and trailing newlines 1091 codeblock = self.__wrapSTXETX__(codeblock); 1092 codeblock = codeblock.replace(/(?=\x02)\n+|\n+(?=\x03)/g, ''); 1093 codeblock = self.__unwrapSTXETX__(codeblock); 1094 1095 codeblock = "<pre><code>" + codeblock + "\n</code></pre>"; 1096 return "\n\n" + self.hashBlock(codeblock) + "\n\n"; 1097 }); 1098 text = this.__unwrapSTXETX__(text); 1099 return text; 1100}; 1101 1102/** 1103 * Create a code span markup for $code. Called from handleSpanToken. 1104 */ 1105Markdown_Parser.prototype.makeCodeSpan = function(code) { 1106 code = this._php_htmlspecialchars_ENT_NOQUOTES(this._php_trim(code)); 1107 return this.hashPart("<code>" + code + "</code>"); 1108}; 1109 1110/** 1111 * Prepare regular expressions for searching emphasis tokens in any 1112 * context. 1113 */ 1114Markdown_Parser.prototype.prepareItalicsAndBold = function() { 1115 this.em_strong_prepared_relist = {}; 1116 for(var i = 0; i < this.em_relist.length; i++) { 1117 var em = this.em_relist[i][0]; 1118 var em_re = this.em_relist[i][1]; 1119 for(var j = 0; j < this.strong_relist.length; j++) { 1120 var strong = this.strong_relist[j][0]; 1121 var strong_re = this.strong_relist[j][1]; 1122 // Construct list of allowed token expressions. 1123 var token_relist = []; 1124 for(var k = 0; k < this.em_strong_relist.length; k++) { 1125 var em_strong = this.em_strong_relist[k][0]; 1126 var em_strong_re = this.em_strong_relist[k][1]; 1127 if(em + strong == em_strong) { 1128 token_relist.push(em_strong_re); 1129 } 1130 } 1131 token_relist.push(em_re); 1132 token_relist.push(strong_re); 1133 1134 // Construct master expression from list. 1135 var token_re = new RegExp('(' + token_relist.join('|') + ')'); 1136 this.em_strong_prepared_relist['rx_' + em + strong] = token_re; 1137 } 1138 } 1139}; 1140 1141Markdown_Parser.prototype.doItalicsAndBold = function(text) { 1142 var em = ''; 1143 var strong = ''; 1144 var tree_char_em = false; 1145 var text_stack = ['']; 1146 var token_stack = []; 1147 var token = ''; 1148 1149 while (1) { 1150 // 1151 // Get prepared regular expression for seraching emphasis tokens 1152 // in current context. 1153 // 1154 var token_re = this.em_strong_prepared_relist['rx_' + em + strong]; 1155 1156 // 1157 // Each loop iteration search for the next emphasis token. 1158 // Each token is then passed to handleSpanToken. 1159 // 1160 var parts = text.match(token_re); //PREG_SPLIT_DELIM_CAPTURE 1161 if(parts) { 1162 var left = RegExp.leftContext; 1163 var right = RegExp.rightContext; 1164 var pre = ""; 1165 var marker = parts[1]; 1166 for(var mg = 2; mg < parts.length; mg += 2) { 1167 if('undefined' !== typeof parts[mg] && parts[mg] != '') { 1168 pre = parts[mg]; 1169 marker = parts[mg + 1]; 1170 break; 1171 } 1172 } 1173 //console.log([left + pre, marker]); 1174 text_stack[0] += (left + pre); 1175 token = marker; 1176 text = right; 1177 } 1178 else { 1179 text_stack[0] += text; 1180 token = ''; 1181 text = ''; 1182 } 1183 if(token == '') { 1184 // Reached end of text span: empty stack without emitting. 1185 // any more emphasis. 1186 while (token_stack.length > 0 && token_stack[0].length > 0) { 1187 text_stack[1] += token_stack.shift(); 1188 var text_stack_prev0 = text_stack.shift(); // $text_stack[0] .= array_shift($text_stack); 1189 text_stack[0] += text_stack_prev0; 1190 } 1191 break; 1192 } 1193 1194 var tag, span; 1195 1196 var token_len = token.length; 1197 if (tree_char_em) { 1198 // Reached closing marker while inside a three-char emphasis. 1199 if (token_len == 3) { 1200 // Three-char closing marker, close em and strong. 1201 token_stack.shift(); 1202 span = text_stack.shift(); 1203 span = this.runSpanGamut(span); 1204 span = "<strong><em>" + span + "</em></strong>"; 1205 text_stack[0] += this.hashPart(span); 1206 em = ''; 1207 strong = ''; 1208 } else { 1209 // Other closing marker: close one em or strong and 1210 // change current token state to match the other 1211 token_stack[0] = this._php_str_repeat(token.charAt(0), 3 - token_len); 1212 tag = token_len == 2 ? "strong" : "em"; 1213 span = text_stack[0]; 1214 span = this.runSpanGamut(span); 1215 span = "<" + tag + ">" + span + "</" + tag + ">"; 1216 text_stack[0] = this.hashPart(span); 1217 if(tag == 'strong') { strong = ''; } else { em = ''; } 1218 } 1219 tree_char_em = false; 1220 } else if (token_len == 3) { 1221 if (em != '') { 1222 // Reached closing marker for both em and strong. 1223 // Closing strong marker: 1224 for (var i = 0; i < 2; ++i) { 1225 var shifted_token = token_stack.shift(); 1226 tag = shifted_token.length == 2 ? "strong" : "em"; 1227 span = text_stack.shift(); 1228 span = this.runSpanGamut(span); 1229 span = "<" + tag + ">" + span + "</" + tag + ">"; 1230 text_stack[0] = this.hashPart(span); 1231 if(tag == 'strong') { strong = ''; } else { em = ''; } 1232 } 1233 } else { 1234 // Reached opening three-char emphasis marker. Push on token 1235 // stack; will be handled by the special condition above. 1236 em = token.charAt(0); 1237 strong = em + em; 1238 token_stack.unshift(token); 1239 text_stack.unshift(''); 1240 tree_char_em = true; 1241 } 1242 } else if (token_len == 2) { 1243 if (strong != '') { 1244 // Unwind any dangling emphasis marker: 1245 if (token_stack[0].length == 1) { 1246 text_stack[1] += token_stack.shift(); 1247 var text_stack_prev0 = text_stack.shift(); // $text_stack[0] .= array_shift($text_stack); 1248 text_stack[0] += text_stack_prev0; 1249 } 1250 // Closing strong marker: 1251 token_stack.shift(); 1252 span = text_stack.shift(); 1253 span = this.runSpanGamut(span); 1254 span = "<strong>" + span + "</strong>"; 1255 text_stack[0] += this.hashPart(span); 1256 strong = ''; 1257 } else { 1258 token_stack.unshift(token); 1259 text_stack.unshift(''); 1260 strong = token; 1261 } 1262 } else { 1263 // Here $token_len == 1 1264 if (em != '') { 1265 if (token_stack[0].length == 1) { 1266 // Closing emphasis marker: 1267 token_stack.shift(); 1268 span = text_stack.shift(); 1269 span = this.runSpanGamut(span); 1270 span = "<em>" + span + "</em>"; 1271 text_stack[0] += this.hashPart(span); 1272 em = ''; 1273 } else { 1274 text_stack[0] += token; 1275 } 1276 } else { 1277 token_stack.unshift(token); 1278 text_stack.unshift(''); 1279 em = token; 1280 } 1281 } 1282 } 1283 return text_stack[0]; 1284}; 1285 1286 1287Markdown_Parser.prototype.doBlockQuotes = function(text) { 1288 var self = this; 1289 text = text.replace(new RegExp( 1290 '(' + // Wrap whole match in $1 1291 '(?:' + 1292 '^[ ]*>[ ]?' + // ">" at the start of a line 1293 '.+\\n' + // rest of the first line 1294 '(.+\\n)*' + // subsequent consecutive lines 1295 '\\n*' + // blanks 1296 ')+' + 1297 ')', 1298 'mg' 1299 ), function(match, bq) { 1300 //console.log(match); 1301 // trim one level of quoting - trim whitespace-only lines 1302 bq = bq.replace(/^[ ]*>[ ]?|^[ ]+$/mg, ''); 1303 bq = self.runBlockGamut(bq); // recurse 1304 1305 bq = bq.replace(/^/mg, " "); 1306 // These leading spaces cause problem with <pre> content, 1307 // so we need to fix that: 1308 bq = bq.replace(/(\\s*<pre>[\\s\\S]+?<\/pre>)/mg, function(match, pre) { 1309 //console.log(match); 1310 pre = pre.replace(/^ /m, ''); 1311 return pre; 1312 }); 1313 1314 return "\n" + self.hashBlock("<blockquote>\n" + bq + "\n</blockquote>") + "\n\n"; 1315 }); 1316 return text; 1317}; 1318 1319/** 1320 * Params: 1321 * $text - string to process with html <p> tags 1322 */ 1323Markdown_Parser.prototype.formParagraphs = function(text) { 1324 1325 // Strip leading and trailing lines: 1326 text = this.__wrapSTXETX__(text); 1327 text = text.replace(/(?:\x02)\n+|\n+(?:\x03)/g, ""); 1328 text = this.__unwrapSTXETX__(text); 1329 // [porting note] 1330 // below may be faster than js regexp. 1331 //for(var s = 0; s < text.length && text.charAt(s) == "\n"; s++) { } 1332 //text = text.substr(s); 1333 //for(var e = text.length; e > 0 && text.charAt(e - 1) == "\n"; e--) { } 1334 //text = text.substr(0, e); 1335 1336 var grafs = text.split(/\n{2,}/m); 1337 //preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1338 1339 // 1340 // Wrap <p> tags and unhashify HTML blocks 1341 // 1342 for(var i = 0; i < grafs.length; i++) { 1343 var value = grafs[i]; 1344 if(value == "") { 1345 // [porting note] 1346 // This case is replacement for PREG_SPLIT_NO_EMPTY. 1347 } 1348 else if (!value.match(/^B\x1A[0-9]+B$/)) { 1349 // Is a paragraph. 1350 value = this.runSpanGamut(value); 1351 value = value.replace(/^([ ]*)/, "<p>"); 1352 value += "</p>"; 1353 grafs[i] = this.unhash(value); 1354 } 1355 else { 1356 // Is a block. 1357 // Modify elements of @grafs in-place... 1358 var graf = value; 1359 var block = this.html_hashes[graf]; 1360 graf = block; 1361 //if (preg_match('{ 1362 // \A 1363 // ( # $1 = <div> tag 1364 // <div \s+ 1365 // [^>]* 1366 // \b 1367 // markdown\s*=\s* ([\'"]) # $2 = attr quote char 1368 // 1 1369 // \2 1370 // [^>]* 1371 // > 1372 // ) 1373 // ( # $3 = contents 1374 // .* 1375 // ) 1376 // (</div>) # $4 = closing tag 1377 // \z 1378 // }xs', $block, $matches)) 1379 //{ 1380 // list(, $div_open, , $div_content, $div_close) = $matches; 1381 // 1382 // # We can't call Markdown(), because that resets the hash; 1383 // # that initialization code should be pulled into its own sub, though. 1384 // $div_content = $this->hashHTMLBlocks($div_content); 1385 // 1386 // # Run document gamut methods on the content. 1387 // foreach ($this->document_gamut as $method => $priority) { 1388 // $div_content = $this->$method($div_content); 1389 // } 1390 // 1391 // $div_open = preg_replace( 1392 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); 1393 // 1394 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close; 1395 //} 1396 grafs[i] = graf; 1397 } 1398 } 1399 1400 return grafs.join("\n\n"); 1401}; 1402 1403/** 1404 * Encode text for a double-quoted HTML attribute. This function 1405 * is *not* suitable for attributes enclosed in single quotes. 1406 */ 1407Markdown_Parser.prototype.encodeAttribute = function(text) { 1408 text = this.encodeAmpsAndAngles(text); 1409 text = text.replace(/"/g, '"'); 1410 return text; 1411}; 1412 1413/** 1414 * Smart processing for ampersands and angle brackets that need to 1415 * be encoded. Valid character entities are left alone unless the 1416 * no-entities mode is set. 1417 */ 1418Markdown_Parser.prototype.encodeAmpsAndAngles = function(text) { 1419 if (this.no_entities) { 1420 text = text.replace(/&/g, '&'); 1421 } else { 1422 // Ampersand-encoding based entirely on Nat Irons's Amputator 1423 // MT plugin: <http://bumppo.net/projects/amputator/> 1424 text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/, '&'); 1425 } 1426 // Encode remaining <'s 1427 text = text.replace(/</g, '<'); 1428 1429 return text; 1430}; 1431 1432Markdown_Parser.prototype.doAutoLinks = function(text) { 1433 var self = this; 1434 text = text.replace(/<((https?|ftp|dict):[^'">\s]+)>/i, function(match, address) { 1435 //console.log(match); 1436 var url = self.encodeAttribute(address); 1437 var link = "<a href=\"" + url + "\">" + url + "</a>"; 1438 return self.hashPart(link); 1439 }); 1440 1441 // Email addresses: <address@domain.foo> 1442 text = text.replace(new RegExp( 1443 '<' + 1444 '(?:mailto:)?' + 1445 '(' + 1446 '(?:' + 1447 '[-!#$%&\'*+/=?^_`.{|}~\\w\\x80-\\xFF]+' + 1448 '|' + 1449 '".*?"' + 1450 ')' + 1451 '\\@' + 1452 '(?:' + 1453 '[-a-z0-9\\x80-\\xFF]+(\\.[-a-z0-9\\x80-\\xFF]+)*\\.[a-z]+' + 1454 '|' + 1455 '\\[[\\d.a-fA-F:]+\\]' + // IPv4 & IPv6 1456 ')' + 1457 ')' + 1458 '>', 1459 'i' 1460 ), function(match, address) { 1461 //console.log(match); 1462 var link = self.encodeEmailAddress(address); 1463 return self.hashPart(link); 1464 }); 1465 1466 return text; 1467}; 1468 1469/** 1470 * Input: an email address, e.g. "foo@example.com" 1471 * 1472 * Output: the email address as a mailto link, with each character 1473 * of the address encoded as either a decimal or hex entity, in 1474 * the hopes of foiling most address harvesting spam bots. E.g.: 1475 * 1476 * <p><a href="mailto:foo 1477 * @example.co 1478 * m">foo@exampl 1479 * e.com</a></p> 1480 * 1481 * Based by a filter by Matthew Wickline, posted to BBEdit-Talk. 1482 * With some optimizations by Milian Wolff. 1483 */ 1484Markdown_Parser.prototype.encodeEmailAddress = function(addr) { 1485 if('undefined' === typeof arguments.callee.crctable) { 1486 arguments.callee.crctable = 1487 "00000000 77073096 EE0E612C 990951BA 076DC419 706AF48F E963A535 9E6495A3 " + 1488 "0EDB8832 79DCB8A4 E0D5E91E 97D2D988 09B64C2B 7EB17CBD E7B82D07 90BF1D91 " + 1489 "1DB71064 6AB020F2 F3B97148 84BE41DE 1ADAD47D 6DDDE4EB F4D4B551 83D385C7 " + 1490 "136C9856 646BA8C0 FD62F97A 8A65C9EC 14015C4F 63066CD9 FA0F3D63 8D080DF5 " + 1491 "3B6E20C8 4C69105E D56041E4 A2677172 3C03E4D1 4B04D447 D20D85FD A50AB56B " + 1492 "35B5A8FA 42B2986C DBBBC9D6 ACBCF940 32D86CE3 45DF5C75 DCD60DCF ABD13D59 " + 1493 "26D930AC 51DE003A C8D75180 BFD06116 21B4F4B5 56B3C423 CFBA9599 B8BDA50F " + 1494 "2802B89E 5F058808 C60CD9B2 B10BE924 2F6F7C87 58684C11 C1611DAB B6662D3D " + 1495 "76DC4190 01DB7106 98D220BC EFD5102A 71B18589 06B6B51F 9FBFE4A5 E8B8D433 " + 1496 "7807C9A2 0F00F934 9609A88E E10E9818 7F6A0DBB 086D3D2D 91646C97 E6635C01 " + 1497 "6B6B51F4 1C6C6162 856530D8 F262004E 6C0695ED 1B01A57B 8208F4C1 F50FC457 " + 1498 "65B0D9C6 12B7E950 8BBEB8EA FCB9887C 62DD1DDF 15DA2D49 8CD37CF3 FBD44C65 " + 1499 "4DB26158 3AB551CE A3BC0074 D4BB30E2 4ADFA541 3DD895D7 A4D1C46D D3D6F4FB " + 1500 "4369E96A 346ED9FC AD678846 DA60B8D0 44042D73 33031DE5 AA0A4C5F DD0D7CC9 " + 1501 "5005713C 270241AA BE0B1010 C90C2086 5768B525 206F85B3 B966D409 CE61E49F " + 1502 "5EDEF90E 29D9C998 B0D09822 C7D7A8B4 59B33D17 2EB40D81 B7BD5C3B C0BA6CAD " + 1503 "EDB88320 9ABFB3B6 03B6E20C 74B1D29A EAD54739 9DD277AF 04DB2615 73DC1683 " + 1504 "E3630B12 94643B84 0D6D6A3E 7A6A5AA8 E40ECF0B 9309FF9D 0A00AE27 7D079EB1 " + 1505 "F00F9344 8708A3D2 1E01F268 6906C2FE F762575D 806567CB 196C3671 6E6B06E7 " + 1506 "FED41B76 89D32BE0 10DA7A5A 67DD4ACC F9B9DF6F 8EBEEFF9 17B7BE43 60B08ED5 " + 1507 "D6D6A3E8 A1D1937E 38D8C2C4 4FDFF252 D1BB67F1 A6BC5767 3FB506DD 48B2364B " + 1508 "D80D2BDA AF0A1B4C 36034AF6 41047A60 DF60EFC3 A867DF55 316E8EEF 4669BE79 " + 1509 "CB61B38C BC66831A 256FD2A0 5268E236 CC0C7795 BB0B4703 220216B9 5505262F " + 1510 "C5BA3BBE B2BD0B28 2BB45A92 5CB36A04 C2D7FFA7 B5D0CF31 2CD99E8B 5BDEAE1D " + 1511 "9B64C2B0 EC63F226 756AA39C 026D930A 9C0906A9 EB0E363F 72076785 05005713 " + 1512 "95BF4A82 E2B87A14 7BB12BAE 0CB61B38 92D28E9B E5D5BE0D 7CDCEFB7 0BDBDF21 " + 1513 "86D3D2D4 F1D4E242 68DDB3F8 1FDA836E 81BE16CD F6B9265B 6FB077E1 18B74777 " + 1514 "88085AE6 FF0F6A70 66063BCA 11010B5C 8F659EFF F862AE69 616BFFD3 166CCF45 " + 1515 "A00AE278 D70DD2EE 4E048354 3903B3C2 A7672661 D06016F7 4969474D 3E6E77DB " + 1516 "AED16A4A D9D65ADC 40DF0B66 37D83BF0 A9BCAE53 DEBB9EC5 47B2CF7F 30B5FFE9 " + 1517 "BDBDF21C CABAC28A 53B39330 24B4A3A6 BAD03605 CDD70693 54DE5729 23D967BF " + 1518 "B3667A2E C4614AB8 5D681B02 2A6F2B94 B40BBE37 C30C8EA1 5A05DF1B 2D02EF8D".split(' '); 1519 } 1520 var crctable = arguments.callee.crctable; 1521 function _crc32(str) { 1522 var crc = 0; 1523 crc = crc ^ (-1); 1524 for (var i = 0; i < str.length; ++i) { 1525 var y = (crc ^ str.charCodeAt(i)) & 0xff; 1526 var x = "0x" + crctable[y]; 1527 crc = (crc >>> 8) ^ x; 1528 } 1529 return (crc ^ (-1)) >>> 0; 1530 } 1531 1532 addr = "mailto:" + addr; 1533 var chars = []; 1534 var i; 1535 for(i = 0; i < addr.length; i++) { 1536 chars.push(addr.charAt(i)); 1537 } 1538 var seed = Math.floor(Math.abs(_crc32(addr) / addr.length)); // # Deterministic seed. 1539 1540 for(i = 0; i < chars.length; i++) { 1541 var c = chars[i]; 1542 var ord = c.charCodeAt(0); 1543 // Ignore non-ascii chars. 1544 if(ord < 128) { 1545 var r = (seed * (1 + i)) % 100; // Pseudo-random function. 1546 // roughly 10% raw, 45% hex, 45% dec 1547 // '@' *must* be encoded. I insist. 1548 if(r > 90 && c != '@') { /* do nothing */ } 1549 else if(r < 45) { chars[i] = '&#x' + ord.toString(16) + ';'; } 1550 else { chars[i] = '&#' + ord.toString(10) + ';'; } 1551 } 1552 } 1553 1554 addr = chars.join(''); 1555 var text = chars.splice(7, chars.length - 1).join(''); // text without `mailto:` 1556 addr = "<a href=\"" + addr + "\">" + text + "</a>"; 1557 1558 return addr; 1559}; 1560 1561/** 1562 * Take the string $str and parse it into tokens, hashing embeded HTML, 1563 * escaped characters and handling code spans. 1564*/ 1565Markdown_Parser.prototype.parseSpan = function(str) { 1566 var output = ''; 1567 1568 var span_re = new RegExp( 1569 '(' + 1570 '\\\\' + this.escape_chars_re + 1571 '|' + 1572 // This expression is too difficult for JS: '(?<![`\\\\])' 1573 // Resoled by hand coded process. 1574 '`+' + // code span marker 1575 (this.no_markup ? '' : ( 1576 '|' + 1577 '<!--.*?-->' + // comment 1578 '|' + 1579 '<\\?.*?\\?>|<%.*?%>' + // processing instruction 1580 '|' + 1581 '<[/!$]?[-a-zA-Z0-9:_]+' + // regular tags 1582 '(?=' + 1583 '\\s' + 1584 '(?=[^"\'>]+|"[^"]*"|\'[^\']*\')*' + 1585 ')?' + 1586 '>' 1587 )) + 1588 ')' 1589 ); 1590 1591 while(1) { 1592 // 1593 // Each loop iteration seach for either the next tag, the next 1594 // openning code span marker, or the next escaped character. 1595 // Each token is then passed to handleSpanToken. 1596 // 1597 var parts = str.match(span_re); //PREG_SPLIT_DELIM_CAPTURE 1598 if(parts) { 1599 if(RegExp.leftContext) { 1600 output += RegExp.leftContext; 1601 } 1602 // Back quote but after backslash is to be ignored. 1603 if(RegExp.lastMatch.charAt(0) == "`" && 1604 RegExp.leftContext.charAt(RegExp.leftContext.length - 1) == "\\" 1605 ) { 1606 output += RegExp.lastMatch; 1607 str = RegExp.rightContext; 1608 continue; 1609 } 1610 var r = this.handleSpanToken(RegExp.lastMatch, RegExp.rightContext); 1611 output += r[0]; 1612 str = r[1]; 1613 } 1614 else { 1615 output += str; 1616 break; 1617 } 1618 } 1619 return output; 1620}; 1621 1622 1623/** 1624 * Handle $token provided by parseSpan by determining its nature and 1625 * returning the corresponding value that should replace it. 1626*/ 1627Markdown_Parser.prototype.handleSpanToken = function(token, str) { 1628 //console.log([token, str]); 1629 switch (token.charAt(0)) { 1630 case "\\": 1631 return [this.hashPart("&#" + token.charCodeAt(1) + ";"), str]; 1632 case "`": 1633 // Search for end marker in remaining text. 1634 if (str.match(new RegExp('^([\\s\\S]*?[^`])' + this._php_preg_quote(token) + '(?!`)([\\s\\S]*)$', 'm'))) { 1635 var code = RegExp.$1; 1636 str = RegExp.$2; 1637 var codespan = this.makeCodeSpan(code); 1638 return [this.hashPart(codespan), str]; 1639 } 1640 return [token, str]; // return as text since no ending marker found. 1641 default: 1642 return [this.hashPart(token), str]; 1643 } 1644}; 1645 1646/** 1647 * Remove one level of line-leading tabs or spaces 1648 */ 1649Markdown_Parser.prototype.outdent = function(text) { 1650 return text.replace(new RegExp('^(\\t|[ ]{1,' + this.tab_width + '})', 'mg'), ''); 1651}; 1652 1653 1654//# String length function for detab. `_initDetab` will create a function to 1655//# hanlde UTF-8 if the default function does not exist. 1656//var $utf8_strlen = 'mb_strlen'; 1657 1658/** 1659 * Replace tabs with the appropriate amount of space. 1660 */ 1661Markdown_Parser.prototype.detab = function(text) { 1662 // For each line we separate the line in blocks delemited by 1663 // tab characters. Then we reconstruct every line by adding the 1664 // appropriate number of space between each blocks. 1665 var self = this; 1666 return text.replace(/^.*\t.*$/mg, function(line) { 1667 //$strlen = $this->utf8_strlen; # strlen function for UTF-8. 1668 // Split in blocks. 1669 var blocks = line.split("\t"); 1670 // Add each blocks to the line. 1671 line = blocks.shift(); // Do not add first block twice. 1672 for(var i = 0; i < blocks.length; i++) { 1673 var block = blocks[i]; 1674 // Calculate amount of space, insert spaces, insert block. 1675 var amount = self.tab_width - line.length % self.tab_width; 1676 line += self._php_str_repeat(" ", amount) + block; 1677 } 1678 return line; 1679 }); 1680}; 1681 1682/** 1683 * Swap back in all the tags hashed by _HashHTMLBlocks. 1684 */ 1685Markdown_Parser.prototype.unhash = function(text) { 1686 var self = this; 1687 return text.replace(/(.)\x1A[0-9]+\1/g, function(match) { 1688 return self.html_hashes[match]; 1689 }); 1690}; 1691/*-------------------------------------------------------------------------*/ 1692 1693/** 1694 * Constructor function. Initialize the parser object. 1695 */ 1696function MarkdownExtra_Parser() { 1697 1698 // Prefix for footnote ids. 1699 this.fn_id_prefix = ""; 1700 1701 // Optional title attribute for footnote links and backlinks. 1702 this.fn_link_title = MARKDOWN_FN_LINK_TITLE; 1703 this.fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE; 1704 1705 // Optional class attribute for footnote links and backlinks. 1706 this.fn_link_class = MARKDOWN_FN_LINK_CLASS; 1707 this.fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS; 1708 1709 // Predefined abbreviations. 1710 this.predef_abbr = {}; 1711 1712 // Extra variables used during extra transformations. 1713 this.footnotes = {}; 1714 this.footnotes_ordered = []; 1715 this.abbr_desciptions = {}; 1716 this.abbr_word_re = ''; 1717 1718 // Give the current footnote number. 1719 this.footnote_counter = 1; 1720 1721 // ### HTML Block Parser ### 1722 1723 // Tags that are always treated as block tags: 1724 this.block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend'; 1725 1726 // Tags treated as block tags only if the opening tag is alone on it's line: 1727 this.context_block_tags_re = 'script|noscript|math|ins|del'; 1728 1729 // Tags where markdown="1" default to span mode: 1730 this.contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 1731 1732 // Tags which must not have their contents modified, no matter where 1733 // they appear: 1734 this.clean_tags_re = 'script|math'; 1735 1736 // Tags that do not need to be closed. 1737 this.auto_close_tags_re = 'hr|img'; 1738 1739 // Redefining emphasis markers so that emphasis by underscore does not 1740 // work in the middle of a word. 1741 this.em_relist = [ 1742 ['' , '(?:(^|[^\\*])(\\*)(?=[^\\*])|(^|[^a-zA-Z0-9_])(_)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'], 1743 ['*', '((?:\\S|^)[^\\*])(\\*)(?!\\*)'], 1744 ['_', '((?:\\S|^)[^_])(_)(?![a-zA-Z0-9_])'] 1745 ]; 1746 this.strong_relist = [ 1747 ['' , '(?:(^|[^\\*])(\\*\\*)(?=[^\\*])|(^|[^a-zA-Z0-9_])(__)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'], 1748 ['**', '((?:\\S|^)[^\\*])(\\*\\*)(?!\\*)'], 1749 ['__', '((?:\\S|^)[^_])(__)(?![a-zA-Z0-9_])'] 1750 ]; 1751 this.em_strong_relist = [ 1752 ['' , '(?:(^|[^\\*])(\\*\\*\\*)(?=[^\\*])|(^|[^a-zA-Z0-9_])(___)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'], 1753 ['***', '((?:\\S|^)[^\\*])(\\*\\*\\*)(?!\\*)'], 1754 ['___', '((?:\\S|^)[^_])(___)(?![a-zA-Z0-9_])'] 1755 ]; 1756 1757 // Add extra escapable characters before parent constructor 1758 // initialize the table. 1759 this.escape_chars += ':|'; 1760 1761 // Insert extra document, block, and span transformations. 1762 // Parent constructor will do the sorting. 1763 this.document_gamut.push(['doFencedCodeBlocks', 5]); 1764 this.document_gamut.push(['stripFootnotes', 15]); 1765 this.document_gamut.push(['stripAbbreviations', 25]); 1766 this.document_gamut.push(['appendFootnotes', 50]); 1767 1768 this.block_gamut.push(['doFencedCodeBlocks', 5]); 1769 this.block_gamut.push(['doTables', 15]); 1770 this.block_gamut.push(['doDefLists', 45]); 1771 1772 this.span_gamut.push(['doFootnotes', 5]); 1773 this.span_gamut.push(['doAbbreviations', 70]); 1774} 1775MarkdownExtra_Parser.prototype = new Markdown_Parser(); 1776 1777/** 1778 * Setting up Extra-specific variables. 1779 */ 1780MarkdownExtra_Parser.prototype.setup = function() { 1781 this.constructor.prototype.setup.call(this); 1782 1783 this.footnotes = {}; 1784 this.footnotes_ordered = []; 1785 this.abbr_desciptions = {}; 1786 this.abbr_word_re = ''; 1787 this.footnote_counter = 1; 1788 1789 for(var abbr_word in this.predef_abbr) { 1790 var abbr_desc = this.predef_abbr[abbr_word]; 1791 if(this.abbr_word_re != '') { 1792 this.abbr_word_re += '|'; 1793 } 1794 this.abbr_word_re += this._php_preg_quote(abbr_word); // ?? str -> re? 1795 this.abbr_desciptions[abbr_word] = this._php_trim(abbr_desc); 1796 } 1797}; 1798 1799/** 1800 * Clearing Extra-specific variables. 1801 */ 1802MarkdownExtra_Parser.prototype.teardown = function() { 1803 this.footnotes = {}; 1804 this.footnotes_ordered = []; 1805 this.abbr_desciptions = {}; 1806 this.abbr_word_re = ''; 1807 1808 this.constructor.prototype.teardown.call(this); 1809}; 1810 1811 1812/** 1813 * Hashify HTML Blocks and "clean tags". 1814 * 1815 * We only want to do this for block-level HTML tags, such as headers, 1816 * lists, and tables. That's because we still want to wrap <p>s around 1817 * "paragraphs" that are wrapped in non-block-level tags, such as anchors, 1818 * phrase emphasis, and spans. The list of tags we're looking for is 1819 * hard-coded. 1820 * 1821 * This works by calling _HashHTMLBlocks_InMarkdown, which then calls 1822 * _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 1823 * attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back 1824 * _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 1825 * These two functions are calling each other. It's recursive! 1826 */ 1827MarkdownExtra_Parser.prototype.hashHTMLBlocks = function(text) { 1828 // 1829 // Call the HTML-in-Markdown hasher. 1830 // 1831 var r = this._hashHTMLBlocks_inMarkdown(text); 1832 text = r[0]; 1833 1834 return text; 1835}; 1836 1837/** 1838 * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 1839 * 1840 * * $indent is the number of space to be ignored when checking for code 1841 * blocks. This is important because if we don't take the indent into 1842 * account, something like this (which looks right) won't work as expected: 1843 * 1844 * <div> 1845 * <div markdown="1"> 1846 * Hello World. <-- Is this a Markdown code block or text? 1847 * </div> <-- Is this a Markdown code block or a real tag? 1848 * <div> 1849 * 1850 * If you don't like this, just don't indent the tag on which 1851 * you apply the markdown="1" attribute. 1852 * 1853 * * If $enclosing_tag_re is not empty, stops at the first unmatched closing 1854 * tag with that name. Nested tags supported. 1855 * 1856 * * If $span is true, text inside must treated as span. So any double 1857 * newline will be replaced by a single newline so that it does not create 1858 * paragraphs. 1859 * 1860 * Returns an array of that form: ( processed text , remaining text ) 1861 */ 1862MarkdownExtra_Parser.prototype._hashHTMLBlocks_inMarkdown = function(text, indent, enclosing_tag_re, span) { 1863 if('undefined' === typeof indent) { indent = 0; } 1864 if('undefined' === typeof enclosing_tag_re) { enclosing_tag_re = ''; } 1865 if('undefined' === typeof span) { span = false; } 1866 1867 if(text === '') { return ['', '']; } 1868 1869 var matches; 1870 1871 // Regex to check for the presense of newlines around a block tag. 1872 var newline_before_re = /(?:^\n?|\n\n)*$/; 1873 var newline_after_re = new RegExp( 1874 '^' + // Start of text following the tag. 1875 '([ ]*<!--.*?-->)?' + // Optional comment. 1876 '[ ]*\\n' , // Must be followed by newline. 1877 'm' 1878 ); 1879 1880 // Regex to match any tag. 1881 var block_tag_re = new RegExp( 1882 '(' + // $2: Capture hole tag. 1883 '</?' + // Any opening or closing tag. 1884 '(' + // Tag name. 1885 this.block_tags_re + '|' + 1886 this.context_block_tags_re + '|' + 1887 this.clean_tags_re + '|' + 1888 '(?!\\s)' + enclosing_tag_re + 1889 ')' + 1890 '(?:' + 1891 '(?=[\\s"\'/a-zA-Z0-9])' + // Allowed characters after tag name. 1892 '(?=(' + 1893 '".*?"|' + // Double quotes (can contain `>`) 1894 '\'.*?\'|' + // Single quotes (can contain `>`) 1895 '.+?' + // Anything but quotes and `>`. 1896 '))\\3*?' + 1897 ')?' + 1898 '>' + // End of tag. 1899 '|' + 1900 '<!--.*?-->' + // HTML Comment 1901 '|' + 1902 '<\\?.*?\\?>|<%.*?%>' + // Processing instruction 1903 '|' + 1904 '<!\\[CDATA\\[.*?\\]\\]>' + // CData Block 1905 '|' + 1906 // Code span marker 1907 '`+' + 1908 ( !span ? // If not in span. 1909 '|' + 1910 // Indented code block 1911 '(?:^[ ]*\\n|^|\\n[ ]*\\n)' + 1912 '[ ]{' + (indent + 4) + '}[^\\n]*\\n' + 1913 '(?=' + 1914 '(?:[ ]{' + (indent + 4) + '}[^\\n]*|[ ]*)\\n' + 1915 ')*' + 1916 '|' + 1917 // Fenced code block marker 1918 '(?:^|\\n)' + 1919 '[ ]{0,' + indent + '}~~~+[ ]*\\n' 1920 : '' ) + // # End (if not is span). 1921 ')', 1922 'm' 1923 ); 1924 1925 var depth = 0; // Current depth inside the tag tree. 1926 var parsed = ""; // Parsed text that will be returned. 1927 1928 // 1929 // Loop through every tag until we find the closing tag of the parent 1930 // or loop until reaching the end of text if no parent tag specified. 1931 // 1932 do { 1933 // 1934 // Split the text using the first $tag_match pattern found. 1935 // Text before pattern will be first in the array, text after 1936 // pattern will be at the end, and between will be any catches made 1937 // by the pattern. 1938 // 1939 var parts_available = text.match(block_tag_re); //PREG_SPLIT_DELIM_CAPTURE 1940 var parts; 1941 if(!parts_available) { 1942 parts = [text]; 1943 } 1944 else { 1945 parts = [RegExp.leftContext, RegExp.lastMatch, RegExp.rightContext]; 1946 } 1947 1948 // If in Markdown span mode, add a empty-string span-level hash 1949 // after each newline to prevent triggering any block element. 1950 if(span) { 1951 var _void = this.hashPart("", ':'); 1952 var newline = _void + "\n"; 1953 parts[0] = _void + parts[0].replace(/\n/g, newline) + _void; 1954 } 1955 1956 parsed += parts[0]; // Text before current tag. 1957 1958 // If end of $text has been reached. Stop loop. 1959 if(!parts_available) { 1960 text = ""; 1961 break; 1962 } 1963 1964 var tag = parts[1]; // Tag to handle. 1965 text = parts[2]; // Remaining text after current tag. 1966 var tag_re = this._php_preg_quote(tag); // For use in a regular expression. 1967 1968 var t; 1969 var block_text; 1970 // 1971 // Check for: Code span marker 1972 // 1973 1974 if (tag.charAt(0) == "`") { 1975 // Find corresponding end marker. 1976 tag_re = this._php_preg_quote(tag); 1977 if (matches = text.match(new RegExp('^((?=(.+?|\\n[^\\n])))/1*?[^`]' + tag_re + '[^`]'))) { 1978 // End marker found: pass text unchanged until marker. 1979 parsed += tag + matches[0]; 1980 text = text.substr(matches[0].length); 1981 } 1982 else { 1983 // Unmatched marker: just skip it. 1984 parsed += tag; 1985 } 1986 } 1987 // 1988 // Check for: Fenced code block marker. 1989 // 1990 else if(tag.match(new RegExp('^\\n?[ ]{0,' + (indent + 3) + '}~'))) { 1991 // Fenced code block marker: find matching end marker. 1992 tag_re = this._php_preg_quote(this._php_trim(tag)); 1993 if(matches = text.match(new RegExp('^(?:.*\\n)+?[ ]{0,' + indent + '}' + tag_re + '[ ]*\\n'))) { 1994 // End marker found: pass text unchanged until marker. 1995 parsed += tag + matches[0]; 1996 text = text.substr(matches[0].length); 1997 } 1998 else { 1999 // No end marker: just skip it. 2000 parsed += tag; 2001 } 2002 } 2003 // 2004 // Check for: Indented code block. 2005 // 2006 else if(tag.charAt(0) == "\n" || tag.charAt(0) == " ") { 2007 // Indented code block: pass it unchanged, will be handled 2008 // later. 2009 parsed += tag; 2010 } 2011 // 2012 // Check for: Opening Block level tag or 2013 // Opening Context Block tag (like ins and del) 2014 // used as a block tag (tag is alone on it's line). 2015 // 2016 else if (tag.match(new RegExp('^<(?:' + this.block_tags_re + ')\\b')) || 2017 ( 2018 tag.match(new RegExp('^<(?:' + this.context_block_tags_re + ')\\b')) && 2019 parsed.match(newline_before_re) && 2020 text.match(newline_after_re) 2021 ) 2022 ) { 2023 // Need to parse tag and following text using the HTML parser. 2024 t = this._hashHTMLBlocks_inHTML(tag + text, this.hashBlock, true); 2025 block_text = t[0]; 2026 text = t[1]; 2027 2028 // Make sure it stays outside of any paragraph by adding newlines. 2029 parsed += "\n\n" + block_text + "\n\n"; 2030 } 2031 // 2032 // Check for: Clean tag (like script, math) 2033 // HTML Comments, processing instructions. 2034 // 2035 else if( 2036 tag.match(new RegExp('^<(?:' + this.clean_tags_re + ')\\b')) || 2037 tag.charAt(1) == '!' || tag.charAt(1) == '?' 2038 ) { 2039 // Need to parse tag and following text using the HTML parser. 2040 // (don't check for markdown attribute) 2041 t = this._hashHTMLBlocks_inHTML(tag + text, this.hashClean, false); 2042 block_text = t[0]; 2043 text = t[1]; 2044 2045 parsed += block_text; 2046 } 2047 // 2048 // Check for: Tag with same name as enclosing tag. 2049 // 2050 else if (enclosing_tag_re !== '' && 2051 // Same name as enclosing tag. 2052 tag.match(new RegExp('^</?(?:' + enclosing_tag_re + ')\\b')) 2053 ) { 2054 // 2055 // Increase/decrease nested tag count. 2056 // 2057 if (tag.charAt(1) == '/') depth--; 2058 else if (tag.charAt(tag.length - 2) != '/') depth++; 2059 2060 if(depth < 0) { 2061 // 2062 // Going out of parent element. Clean up and break so we 2063 // return to the calling function. 2064 // 2065 text = tag + text; 2066 break; 2067 } 2068 2069 parsed += tag; 2070 } 2071 else { 2072 parsed += tag; 2073 } 2074 } while(depth >= 0); 2075 2076 return [parsed, text]; 2077}; 2078 2079/** 2080 * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 2081 * 2082 * * Calls $hash_method to convert any blocks. 2083 * * Stops when the first opening tag closes. 2084 * * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 2085 * (it is not inside clean tags) 2086 * 2087 * Returns an array of that form: ( processed text , remaining text ) 2088 */ 2089MarkdownExtra_Parser.prototype._hashHTMLBlocks_inHTML = function(text, hash_method, md_attr) { 2090 if(text === '') return ['', '']; 2091 2092 var matches; 2093 2094 // Regex to match `markdown` attribute inside of a tag. 2095 var markdown_attr_re = new RegExp( 2096 '\\s*' + // Eat whitespace before the `markdown` attribute 2097 'markdown' + 2098 '\\s*=\\s*' + 2099 '(?:' + 2100 '(["\'])' + // $1: quote delimiter 2101 '(.*?)' + // $2: attribute value 2102 '\\1' + // matching delimiter 2103 '|' + 2104 '([^\\s>]*)' + // $3: unquoted attribute value 2105 ')' + 2106 '()' // $4: make $3 always defined (avoid warnings) 2107 ); 2108 2109 // Regex to match any tag. 2110 var tag_re = new RegExp( 2111 '(' + // $2: Capture hole tag. 2112 '</?' + // Any opening or closing tag. 2113 '[\\w:$]+' + // Tag name. 2114 '(?:' + 2115 '(?=[\\s"\'/a-zA-Z0-9])' + // Allowed characters after tag name. 2116 '(?:' + 2117 '(?=(' + 2118 '".*?"|' + // Double quotes (can contain `>`) 2119 '\'.*?\'|' + // Single quotes (can contain `>`) 2120 '.+?' + // Anything but quotes and `>`. 2121 '))\\4' + 2122 ')*?' + 2123 ')?' + 2124 '>' + // End of tag. 2125 '|' + 2126 '<!--.*?-->' + // HTML Comment 2127 '|' + 2128 '<\\?.*?\\?>|<%.*?%>' + // Processing instruction 2129 '|' + 2130 '<!\\[CDATA\\[.*?\\]\\]>' + // CData Block 2131 ')' 2132 ); 2133 2134 var original_text = text; // Save original text in case of faliure. 2135 2136 var depth = 0; // Current depth inside the tag tree. 2137 var block_text = ""; // Temporary text holder for current text. 2138 var parsed = ""; // Parsed text that will be returned. 2139 2140 // 2141 // Get the name of the starting tag. 2142 // (This pattern makes $base_tag_name_re safe without quoting.) 2143 // 2144 var base_tag_name_re = ""; 2145 if(matches = text.match(/^<([\w:$]*)\b/)) { 2146 base_tag_name_re = matches[1]; 2147 } 2148 2149 // 2150 // Loop through every tag until we find the corresponding closing tag. 2151 // 2152 do { 2153 // 2154 // Split the text using the first $tag_match pattern found. 2155 // Text before pattern will be first in the array, text after 2156 // pattern will be at the end, and between will be any catches made 2157 // by the pattern. 2158 // 2159 var parts_available = text.match(tag_re); //PREG_SPLIT_DELIM_CAPTURE); 2160 // If end of $text has been reached. Stop loop. 2161 if(!parts_available) { 2162 // 2163 // End of $text reached with unbalenced tag(s). 2164 // In that case, we return original text unchanged and pass the 2165 // first character as filtered to prevent an infinite loop in the 2166 // parent function. 2167 // 2168 return [original_text.charAt(0), original_text.substr(1)]; 2169 } 2170 var parts = [RegExp.leftContext, RegExp.lastMatch, RegExp.rightContext]; 2171 2172 block_text += parts[0]; // Text before current tag. 2173 var tag = parts[1]; // Tag to handle. 2174 text = parts[2]; // Remaining text after current tag. 2175 2176 // 2177 // Check for: Auto-close tag (like <hr/>) 2178 // Comments and Processing Instructions. 2179 // 2180 if(tag.match(new RegExp('^</?(?:' + this.auto_close_tags_re + ')\\b')) || 2181 tag.charAt(1) == '!' || tag.charAt(1) == '?') 2182 { 2183 // Just add the tag to the block as if it was text. 2184 block_text += tag; 2185 } 2186 else { 2187 // 2188 // Increase/decrease nested tag count. Only do so if 2189 // the tag's name match base tag's. 2190 // 2191 if (tag.match(new RegExp('^</?' + base_tag_name_re + '\\b'))) { 2192 if(tag.charAt(1) == '/') { depth--; } 2193 else if(tag.charAt(tag.length - 2) != '/') { depth++; } 2194 } 2195 2196 // 2197 // Check for `markdown="1"` attribute and handle it. 2198 // 2199 var attr_m; 2200 if(md_attr && 2201 (attr_m = tag.match(markdown_attr_re)) && 2202 (attr_m[2] + attr_m[3]).match(/^1|block|span$/)) 2203 { 2204 // Remove `markdown` attribute from opening tag. 2205 tag = tag.replace(markdown_attr_re, ''); 2206 2207 // Check if text inside this tag must be parsed in span mode. 2208 this.mode = attr_m[2] + attr_m[3]; 2209 var span_mode = this.mode == 'span' || this.mode != 'block' && 2210 tag.match(new RegExp('^<(?:' + this.contain_span_tags_re + ')\\b')); 2211 2212 // Calculate indent before tag. 2213 var indent; 2214 if (matches = block_text.match(/(?:^|\n)( *?)(?! ).*?$/)) { 2215 //var strlen = this.utf8_strlen; 2216 indent = matches[1].length; //strlen(matches[1], 'UTF-8'); 2217 } else { 2218 indent = 0; 2219 } 2220 2221 // End preceding block with this tag. 2222 block_text += tag; 2223 parsed += hash_method.call(this, block_text); 2224 2225 // Get enclosing tag name for the ParseMarkdown function. 2226 // (This pattern makes $tag_name_re safe without quoting.) 2227 matches = tag.match(/^<([\w:$]*)\b/); 2228 var tag_name_re = matches[1]; 2229 2230 // Parse the content using the HTML-in-Markdown parser. 2231 var t = this._hashHTMLBlocks_inMarkdown(text, indent, tag_name_re, span_mode); 2232 block_text = t[0]; 2233 text = t[1]; 2234 2235 // Outdent markdown text. 2236 if(indent > 0) { 2237 block_text = block_text.replace(new RegExp('/^[ ]{1,' + indent + '}', 'm'), ""); 2238 } 2239 2240 // Append tag content to parsed text. 2241 if (!span_mode) { parsed += "\n\n" + block_text + "\n\n"; } 2242 else { parsed += block_text; } 2243 2244 // Start over a new block. 2245 block_text = ""; 2246 } 2247 else { 2248 block_text += tag; 2249 } 2250 } 2251 2252 } while(depth > 0); 2253 2254 // 2255 // Hash last block text that wasn't processed inside the loop. 2256 // 2257 parsed += hash_method.call(this, block_text); 2258 2259 return [parsed, text]; 2260}; 2261 2262 2263/** 2264 * Called whenever a tag must be hashed when a function insert a "clean" tag 2265 * in $text, it pass through this function and is automaticaly escaped, 2266 * blocking invalid nested overlap. 2267 */ 2268MarkdownExtra_Parser.prototype.hashClean = function(text) { 2269 return this.hashPart(text, 'C'); 2270}; 2271 2272 2273/** 2274 * Redefined to add id attribute support. 2275 */ 2276MarkdownExtra_Parser.prototype.doHeaders = function(text) { 2277 var self = this; 2278 2279 function _doHeaders_attr(attr) { 2280 if('undefined' === typeof attr || attr == "") { return ""; } 2281 return " id=\"" + attr + "\""; 2282 } 2283 2284 // Setext-style headers: 2285 // Header 1 {#header1} 2286 // ======== 2287 // 2288 // Header 2 {#header2} 2289 // -------- 2290 2291 text = text.replace(new RegExp( 2292 '(^.+?)' + // $1: Header text 2293 '(?:[ ]+\\{\\#([-_:a-zA-Z0-9]+)\\})?' + // $2: Id attribute 2294 '[ ]*\\n(=+|-+)[ ]*\\n+', // $3: Header footer 2295 'mg' 2296 ), function(match, span, id, line) { 2297 //console.log(match); 2298 if(line == '-' && span.match(/^- /)) { 2299 return match; 2300 } 2301 var level = line.charAt(0) == '=' ? 1 : 2; 2302 var attr = _doHeaders_attr(id); 2303 var block = "<h" + level + attr + ">" + self.runSpanGamut(span) + "</h" + level + ">"; 2304 return "\n" + self.hashBlock(block) + "\n\n"; 2305 }); 2306 2307 // atx-style headers: 2308 // # Header 1 {#header1} 2309 // ## Header 2 {#header2} 2310 // ## Header 2 with closing hashes ## {#header3} 2311 // ... 2312 // ###### Header 6 {#header2} 2313 2314 text = text.replace(new RegExp( 2315 '^(\\#{1,6})' + // $1 = string of #\'s 2316 '[ ]*' + 2317 '(.+?)' + // $2 = Header text 2318 '[ ]*' + 2319 '\\#*' + // optional closing #\'s (not counted) 2320 '(?:[ ]+\\{\\#([-_:a-zA-Z0-9]+)\\})?' + // id attribute 2321 '\\n+', 2322 'mg' 2323 ), function(match, hashes, span, id) { 2324 //console.log(match); 2325 var level = hashes.length; 2326 var attr = _doHeaders_attr(id); 2327 var block = "<h" + level + attr + ">" + self.runSpanGamut(span) + "</h" + level + ">"; 2328 return "\n" + self.hashBlock(block) + "\n\n"; 2329 }); 2330 2331 return text; 2332}; 2333 2334/** 2335 * Form HTML tables. 2336 */ 2337MarkdownExtra_Parser.prototype.doTables = function(text) { 2338 var self = this; 2339 2340 var less_than_tab = this.tab_width - 1; 2341 2342 var _doTable_callback = function(match, head, underline, content) { 2343 //console.log(match); 2344 // Remove any tailing pipes for each line. 2345 head = head.replace(/[|] *$/m, ''); 2346 underline = underline.replace(/[|] *$/m, ''); 2347 content = content.replace(/[|] *$/m, ''); 2348 2349 var attr = []; 2350 2351 // Reading alignement from header underline. 2352 var separators = underline.split(/[ ]*[|][ ]*/); 2353 var n; 2354 for(n = 0; n < separators.length; n++) { 2355 var s = separators[n]; 2356 if (s.match(/^ *-+: *$/)) { attr[n] = ' align="right"'; } 2357 else if (s.match(/^ *:-+: *$/)) { attr[n] = ' align="center"'; } 2358 else if (s.match(/^ *:-+ *$/)) { attr[n] = ' align="left"'; } 2359 else { attr[n] = ''; } 2360 } 2361 2362 // Parsing span elements, including code spans, character escapes, 2363 // and inline HTML tags, so that pipes inside those gets ignored. 2364 head = self.parseSpan(head); 2365 var headers = head.split(/ *[|] */); 2366 var col_count = headers.length; 2367 2368 // Write column headers. 2369 var text = "<table>\n"; 2370 text += "<thead>\n"; 2371 text += "<tr>\n"; 2372 for(n = 0; n < headers.length; n++) { 2373 var header = headers[n]; 2374 text += " <th" + attr[n] + ">" + self.runSpanGamut(self._php_trim(header)) + "</th>\n"; 2375 } 2376 text += "</tr>\n"; 2377 text += "</thead>\n"; 2378 2379 // Split content by row. 2380 var rows = self._php_trim(content, "\n").split("\n"); 2381 2382 text += "<tbody>\n"; 2383 for(var i = 0; i < rows.length; i++) { 2384 var row = rows[i]; 2385 // Parsing span elements, including code spans, character escapes, 2386 // and inline HTML tags, so that pipes inside those gets ignored. 2387 row = self.parseSpan(row); 2388 2389 // Split row by cell. 2390 var row_cells = row.split(/ *[|] */, col_count); 2391 while(row_cells.length < col_count) { row_cells.push(''); } 2392 2393 text += "<tr>\n"; 2394 for(n = 0; n < row_cells.length; n++) { 2395 var cell = row_cells[n]; 2396 text += " <td" + attr[n] + ">" + self.runSpanGamut(self._php_trim(cell)) + "</td>\n"; 2397 } 2398 text += "</tr>\n"; 2399 } 2400 text += "</tbody>\n"; 2401 text += "</table>"; 2402 2403 return self.hashBlock(text) + "\n"; 2404 }; 2405 2406 text = this.__wrapSTXETX__(text); 2407 2408 // 2409 // Find tables with leading pipe. 2410 // 2411 // | Header 1 | Header 2 2412 // | -------- | -------- 2413 // | Cell 1 | Cell 2 2414 // | Cell 3 | Cell 4 2415 // 2416 text = text.replace(new RegExp( 2417 '^' + // Start of a line 2418 '[ ]{0,' + less_than_tab + '}' + // Allowed whitespace. 2419 '[|]' + // Optional leading pipe (present) 2420 '(.+)\\n' + // $1: Header row (at least one pipe) 2421 2422 '[ ]{0,' + less_than_tab + '}' + // Allowed whitespace. 2423 '[|]([ ]*[-:]+[-| :]*)\\n' + // $2: Header underline 2424 2425 '(' + // $3: Cells 2426 '(?:' + 2427 '[ ]*' + // Allowed whitespace. 2428 '[|].*\\n' + // Row content. 2429 ')*' + 2430 ')' + 2431 '(?=\\n|\\x03)' , // Stop at final double newline. 2432 'mg' 2433 ), function(match, head, underline, content) { 2434 // Remove leading pipe for each row. 2435 content = content.replace(/^ *[|]/m, ''); 2436 2437 return _doTable_callback.call(this, match, head, underline, content); 2438 }); 2439 2440 // 2441 // Find tables without leading pipe. 2442 // 2443 // Header 1 | Header 2 2444 // -------- | -------- 2445 // Cell 1 | Cell 2 2446 // Cell 3 | Cell 4 2447 // 2448 text = text.replace(new RegExp( 2449 '^' + // Start of a line 2450 '[ ]{0,' + less_than_tab + '}' + // Allowed whitespace. 2451 '(\\S.*[|].*)\\n' + // $1: Header row (at least one pipe) 2452 2453 '[ ]{0,' + less_than_tab + '}' + // Allowed whitespace. 2454 '([-:]+[ ]*[|][-| :]*)\\n' + // $2: Header underline 2455 2456 '(' + // $3: Cells 2457 '(?:' + 2458 '.*[|].*\\n' + // Row content 2459 ')*' + 2460 ')' + 2461 '(?=\\n|\\x03)' , // Stop at final double newline. 2462 'mg' 2463 ), _doTable_callback); 2464 2465 text = this.__unwrapSTXETX__(text); 2466 2467 return text; 2468}; 2469 2470/** 2471 * Form HTML definition lists. 2472 */ 2473MarkdownExtra_Parser.prototype.doDefLists = function(text) { 2474 var self = this; 2475 2476 var less_than_tab = this.tab_width - 1; 2477 2478 // Re-usable pattern to match any entire dl list: 2479 var whole_list_re = '(?:' + 2480 '(' + // $1 = whole list 2481 '(' + // $2 2482 '[ ]{0,' + less_than_tab + '}' + 2483 '((?:[ \\t]*\\S.*\\n)+)' + // $3 = defined term 2484 // [porting note] Original regex from PHP is 2485 // (?>.*\S.*\n), which matches a line with at 2486 // least one non-space character. Change the 2487 // first .* to [ \t]* stops unneccessary 2488 // backtracking hence improves performance 2489 '\\n?' + 2490 '[ ]{0,' + less_than_tab + '}:[ ]+' + // colon starting definition 2491 ')' + 2492 '([\\s\\S]+?)' + 2493 '(' + // $4 2494 '(?=\\0x03)' + // \z 2495 '|' + 2496 '(?=' + // [porting note] Our regex will consume leading 2497 // newline characters so we will leave the newlines 2498 // here for the next definition 2499 '\\n{2,}' + 2500 '(?=\\S)' + 2501 '(?!' + // Negative lookahead for another term 2502 '[ ]{0,' + less_than_tab + '}' + 2503 '(?:\\S.*\\n)+?' + // defined term 2504 '\\n?' + 2505 '[ ]{0,' + less_than_tab + '}:[ ]+' + // colon starting definition 2506 ')' + 2507 '(?!' + // Negative lookahead for another definition 2508 '[ ]{0,' + less_than_tab + '}:[ ]+' + // colon starting definition 2509 ')' + 2510 ')' + 2511 ')' + 2512 ')' + 2513 ')'; // mx 2514 2515 text = this.__wrapSTXETX__(text); 2516 text = text.replace(new RegExp( 2517 '(\\x02\\n?|\\n\\n)' + 2518 whole_list_re, 'mg' 2519 ), function(match, pre, list) { 2520 //console.log(match); 2521 // Re-usable patterns to match list item bullets and number markers: 2522 // [portiong note] changed to list = $2 in order to reserve previously \n\n. 2523 2524 // Turn double returns into triple returns, so that we can make a 2525 // paragraph for the last item in a list, if necessary: 2526 var result = self._php_trim(self.processDefListItems(list)); 2527 result = "<dl>\n" + result + "\n</dl>"; 2528 return pre + self.hashBlock(result) + "\n\n"; 2529 }); 2530 text = this.__unwrapSTXETX__(text); 2531 2532 return text; 2533}; 2534 2535/** 2536 * Process the contents of a single definition list, splitting it 2537 * into individual term and definition list items. 2538 */ 2539MarkdownExtra_Parser.prototype.processDefListItems = function(list_str) { 2540 var self = this; 2541 2542 var less_than_tab = this.tab_width - 1; 2543 2544 list_str = this.__wrapSTXETX__(list_str); 2545 2546 // trim trailing blank lines: 2547 list_str = list_str.replace(/\n{2,}(?=\\x03)/, "\n"); 2548 2549 // Process definition terms. 2550 list_str = list_str.replace(new RegExp( 2551 '(\\x02\\n?|\\n\\n+)' + // leading line 2552 '(' + // definition terms = $1 2553 '[ ]{0,' + less_than_tab + '}' + // leading whitespace 2554 '(?![:][ ]|[ ])' + // negative lookahead for a definition 2555 // mark (colon) or more whitespace. 2556 '(?:\\S.*\\n)+?' + // actual term (not whitespace). 2557 ')' + 2558 '(?=\\n?[ ]{0,3}:[ ])' , // lookahead for following line feed 2559 // with a definition mark. 2560 'mg' 2561 ), function(match, pre, terms_str) { 2562 // [portiong note] changed to list = $2 in order to reserve previously \n\n. 2563 var terms = self._php_trim(terms_str).split("\n"); 2564 var text = ''; 2565 for (var i = 0; i < terms.length; i++) { 2566 var term = terms[i]; 2567 term = self.runSpanGamut(self._php_trim(term)); 2568 text += "\n<dt>" + term + "</dt>"; 2569 } 2570 return text + "\n"; 2571 }); 2572 2573 // Process actual definitions. 2574 list_str = list_str.replace(new RegExp( 2575 '\\n(\\n+)?' + // leading line = $1 2576 '(' + // marker space = $2 2577 '[ ]{0,' + less_than_tab + '}' + // whitespace before colon 2578 '[:][ ]+' + // definition mark (colon) 2579 ')' + 2580 '([\\s\\S]+?)' + // definition text = $3 2581 // [porting note] Maybe no trailing 2582 // newlines in our version, changed the 2583 // following line from \n+ to \n*. 2584 '(?=\\n*' + // stop at next definition mark, 2585 '(?:' + // next term or end of text 2586 '\\n[ ]{0,' + less_than_tab + '}[:][ ]|' + // [porting note] do not match 2587 // colon in the middle of a line 2588 '<dt>|\\x03' + // \z 2589 ')' + 2590 ')', 2591 'mg' 2592 ), function(match, leading_line, marker_space, def) { 2593 if (leading_line || def.match(/\n{2,}/)) { 2594 // Replace marker with the appropriate whitespace indentation 2595 def = self._php_str_repeat(' ', marker_space.length) + def; 2596 def = self.runBlockGamut(self.outdent(def + "\n\n")); 2597 def = "\n" + def + "\n"; 2598 } 2599 else { 2600 def = self._php_rtrim(def); 2601 def = self.runSpanGamut(self.outdent(def)); 2602 } 2603 2604 return "\n<dd>" + def + "</dd>\n"; 2605 }); 2606 2607 list_str = this.__unwrapSTXETX__(list_str); 2608 2609 return list_str; 2610}; 2611 2612/** 2613 * Adding the fenced code block syntax to regular Markdown: 2614 * 2615 * ~~~ 2616 * Code block 2617 * ~~~ 2618 */ 2619MarkdownExtra_Parser.prototype.doFencedCodeBlocks = function(text) { 2620 var self = this; 2621 2622 var less_than_tab = this.tab_width; 2623 2624 text = this.__wrapSTXETX__(text); 2625 text = text.replace(new RegExp( 2626 '(?:\\n|\\x02)' + 2627 // 1: Opening marker 2628 '(' + 2629 '~{3,}' + // Marker: three tilde or more. 2630 ')' + 2631 '[ ]*\\n' + // Whitespace and newline following marker. 2632 2633 // 2: Content 2634 '(' + 2635 '(?:' + 2636 '(?=(' + 2637 '(?!\\1[ ]*\\n)' + // Not a closing marker. 2638 '.*\\n+' + 2639 '))\\3' + 2640 ')+' + 2641 ')' + 2642 2643 // Closing marker. 2644 '\\1[ ]*\\n', 2645 "mg" 2646 ), function(match, m1, codeblock) { 2647 codeblock = self._php_htmlspecialchars_ENT_NOQUOTES(codeblock); 2648 codeblock = codeblock.replace(/^\n+/, function(match) { 2649 return self._php_str_repeat("<br" + self.empty_element_suffix, match.length); 2650 }); 2651 codeblock = "<pre><code>" + codeblock + "</code></pre>"; 2652 return "\n\n" + self.hashBlock(codeblock) + "\n\n"; 2653 }); 2654 text = this.__unwrapSTXETX__(text); 2655 2656 return text; 2657}; 2658 2659/** 2660 * Params: 2661 * $text - string to process with html <p> tags 2662 */ 2663MarkdownExtra_Parser.prototype.formParagraphs = function(text) { 2664 2665 // Strip leading and trailing lines: 2666 text = this.__wrapSTXETX__(text); 2667 text = text.replace(/(?:\x02)\n+|\n+(?:\x03)/g, ""); 2668 text = this.__unwrapSTXETX__(text); 2669 2670 var grafs = text.split(/\n{2,}/m); 2671 //preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 2672 2673 // 2674 // Wrap <p> tags and unhashify HTML blocks 2675 // 2676 for(var i = 0; i < grafs.length; i++) { 2677 var value = grafs[i]; 2678 if(value == "") { 2679 // [porting note] 2680 // This case is replacement for PREG_SPLIT_NO_EMPTY. 2681 continue; 2682 } 2683 value = this._php_trim(this.runSpanGamut(value)); 2684 2685 // Check if this should be enclosed in a paragraph. 2686 // Clean tag hashes & block tag hashes are left alone. 2687 var is_p = !value.match(/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/); 2688 2689 if (is_p) { 2690 value = "<p>" + value + "</p>"; 2691 } 2692 grafs[i] = value; 2693 } 2694 2695 // Join grafs in one text, then unhash HTML tags. 2696 text = grafs.join("\n\n"); 2697 2698 // Finish by removing any tag hashes still present in $text. 2699 text = this.unhash(text); 2700 2701 return text; 2702}; 2703 2704// ### Footnotes 2705 2706/** 2707 * Strips link definitions from text, stores the URLs and titles in 2708 * hash references. 2709 */ 2710MarkdownExtra_Parser.prototype.stripFootnotes = function(text) { 2711 var self = this; 2712 2713 var less_than_tab = this.tab_width - 1; 2714 2715 // Link defs are in the form: [^id]: url "optional title" 2716 text = text.replace(new RegExp( 2717 '^[ ]{0,' + less_than_tab + '}\\[\\^(.+?)\\][ ]?:' + // note_id = $1 2718 '[ ]*' + 2719 '\\n?' + // maybe *one* newline 2720 '(' + // text = $2 (no blank lines allowed) 2721 '(?:' + 2722 '.+' + // actual text 2723 '|' + 2724 '\\n' + // newlines but 2725 '(?!\\[\\^.+?\\]:\\s)' + // negative lookahead for footnote marker. 2726 '(?!\\n+[ ]{0,3}\\S)' + // ensure line is not blank and followed 2727 // by non-indented content 2728 ')*' + 2729 ')', 2730 "mg" 2731 ), function(match, m1, m2) { 2732 var note_id = self.fn_id_prefix + m1; 2733 self.footnotes[note_id] = self.outdent(m2); 2734 return ''; //# String that will replace the block 2735 }); 2736 return text; 2737}; 2738 2739/** 2740 * Replace footnote references in $text [^id] with a special text-token 2741 * which will be replaced by the actual footnote marker in appendFootnotes. 2742 */ 2743MarkdownExtra_Parser.prototype.doFootnotes = function(text) { 2744 if (!this.in_anchor) { 2745 text = text.replace(/\[\^(.+?)\]/g, "F\x1Afn:$1\x1A:"); 2746 } 2747 return text; 2748}; 2749 2750/** 2751 * Append footnote list to text. 2752 */ 2753MarkdownExtra_Parser.prototype.appendFootnotes = function(text) { 2754 var self = this; 2755 2756 var _appendFootnotes_callback = function(match, m1) { 2757 var node_id = self.fn_id_prefix + m1; 2758 2759 // Create footnote marker only if it has a corresponding footnote *and* 2760 // the footnote hasn't been used by another marker. 2761 if (node_id in self.footnotes) { 2762 // Transfert footnote content to the ordered list. 2763 self.footnotes_ordered.push([node_id, self.footnotes[node_id]]); 2764 delete self.footnotes[node_id]; 2765 2766 var num = self.footnote_counter++; 2767 var attr = " rel=\"footnote\""; 2768 if (self.fn_link_class != "") { 2769 var classname = self.fn_link_class; 2770 classname = self.encodeAttribute(classname); 2771 attr += " class=\"" + classname + "\""; 2772 } 2773 if (self.fn_link_title != "") { 2774 var title = self.fn_link_title; 2775 title = self.encodeAttribute(title); 2776 attr += " title=\"" + title +"\""; 2777 } 2778 2779 attr = attr.replace(/%%/g, num); 2780 node_id = self.encodeAttribute(node_id); 2781 2782 return "<sup id=\"fnref:" + node_id + "\">" + 2783 "<a href=\"#fn:" + node_id + "\"" + attr + ">" + num + "</a>" + 2784 "</sup>"; 2785 } 2786 2787 return "[^" + m1 + "]"; 2788 }; 2789 2790 text = text.replace(/F\x1Afn:(.*?)\x1A:/g, _appendFootnotes_callback); 2791 2792 if (this.footnotes_ordered.length > 0) { 2793 text += "\n\n"; 2794 text += "<div class=\"footnotes\">\n"; 2795 text += "<hr" + this.empty_element_suffix + "\n"; 2796 text += "<ol>\n\n"; 2797 2798 var attr = " rev=\"footnote\""; 2799 if (this.fn_backlink_class != "") { 2800 var classname = this.fn_backlink_class; 2801 classname = this.encodeAttribute(classname); 2802 attr += " class=\"" + classname + "\""; 2803 } 2804 if (this.fn_backlink_title != "") { 2805 var title = this.fn_backlink_title; 2806 title = this.encodeAttribute(title); 2807 attr += " title=\"" + title + "\""; 2808 } 2809 var num = 0; 2810 2811 while (this.footnotes_ordered.length > 0) { 2812 var head = this.footnotes_ordered.shift(); 2813 var note_id = head[0]; 2814 var footnote = head[1]; 2815 2816 footnote += "\n"; // Need to append newline before parsing. 2817 footnote = this.runBlockGamut(footnote + "\n"); 2818 footnote = footnote.replace(/F\x1Afn:(.*?)\x1A:/g, _appendFootnotes_callback); 2819 2820 attr = attr.replace(/%%/g, ++num); 2821 note_id = this.encodeAttribute(note_id); 2822 2823 // Add backlink to last paragraph; create new paragraph if needed. 2824 var backlink = "<a href=\"#fnref:" + note_id + "\"" + attr + ">↩</a>"; 2825 if (footnote.match(/<\/p>$/)) { 2826 footnote = footnote.substr(0, footnote.length - 4) + " " + backlink + "</p>"; 2827 } else { 2828 footnote += "\n\n<p>" + backlink + "</p>"; 2829 } 2830 2831 text += "<li id=\"fn:" + note_id + "\">\n"; 2832 text += footnote + "\n"; 2833 text += "</li>\n\n"; 2834 } 2835 2836 text += "</ol>\n"; 2837 text += "</div>"; 2838 } 2839 return text; 2840}; 2841 2842//### Abbreviations ### 2843 2844/** 2845 * Strips abbreviations from text, stores titles in hash references. 2846 */ 2847MarkdownExtra_Parser.prototype.stripAbbreviations = function(text) { 2848 var self = this; 2849 2850 var less_than_tab = this.tab_width - 1; 2851 2852 // Link defs are in the form: [id]*: url "optional title" 2853 text = text.replace(new RegExp( 2854 '^[ ]{0,' + less_than_tab + '}\\*\\[(.+?)\\][ ]?:' + // abbr_id = $1 2855 '(.*)', // text = $2 (no blank lines allowed) 2856 "m" 2857 ), function(match, abbr_word, abbr_desc) { 2858 if (self.abbr_word_re != '') { 2859 self.abbr_word_re += '|'; 2860 } 2861 self.abbr_word_re += self._php_preg_quote(abbr_word); 2862 self.abbr_desciptions[abbr_word] = self._php_trim(abbr_desc); 2863 return ''; // String that will replace the block 2864 }); 2865 return text; 2866}; 2867 2868/** 2869 * Find defined abbreviations in text and wrap them in <abbr> elements. 2870 */ 2871MarkdownExtra_Parser.prototype.doAbbreviations = function(text) { 2872 var self = this; 2873 2874 if (this.abbr_word_re) { 2875 // cannot use the /x modifier because abbr_word_re may 2876 // contain significant spaces: 2877 text = text.replace(new RegExp( 2878 '(^|[^\\w\\x1A])' + 2879 '(' + this.abbr_word_re + ')' + 2880 '(?![\\w\\x1A])' 2881 ), function(match, prev, abbr) { 2882 if (abbr in self.abbr_desciptions) { 2883 var desc = self.abbr_desciptions[abbr]; 2884 if (!desc || desc == "") { 2885 return self.hashPart("<abbr>" + abbr + "</abbr>"); 2886 } else { 2887 desc = self.encodeAttribute(desc); 2888 return self.hashPart("<abbr title=\"" + desc + "\">" + abbr + "</abbr>"); 2889 } 2890 } else { 2891 return match; 2892 } 2893 }); 2894 } 2895 return text; 2896}; 2897 2898 2899/** 2900 * Export to Node.js 2901 */ 2902this.Markdown = Markdown; 2903this.Markdown_Parser = Markdown_Parser; 2904this.MarkdownExtra_Parser = MarkdownExtra_Parser; 2905 2906