s around // "paragraphs" that are wrapped in non-block-level tags, such as anchors, // phrase emphasis, and spans. The list of tags we're looking for is // hard-coded: // // * List "a" is made of tags which can be both inline or block-level. // These will be treated block-level when the start tag is alone on // its line, otherwise they're not matched here and will be taken as // inline later. // * List "b" is made of tags which are always block-level; var block_tags_a_re = 'ins|del'; var block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|' + 'script|noscript|form|fieldset|iframe|math'; // Regular expression for the content of a block tag. var nested_tags_level = 4; var attr = '(?:' + // optional tag attributes '\\s' + // starts with whitespace '(?:' + '[^>"/]+' + // text outside quotes '|' + '/+(?!>)' + // slash not followed by ">" '|' + '"[^"]*"' + // text inside double quotes (tolerate ">") '|' + '\'[^\']*\'' + // text inside single quotes (tolerate ">") ')*' + ')?'; var content = this._php_str_repeat( '(?:' + '[^<]+' + // content without tag '|' + '<\\2' + // nested opening tag attr + // attributes '(?:' + '/>' + '|' + '>', nested_tags_level ) + // end of opening tag '.*?' + // last level nested tag content this._php_str_repeat( '\\2\\s*>' + // closing nested tag ')' + '|' + '<(?!/\\2\\s*>)' + // other tags with a different name ')*', nested_tags_level ); var content2 = content.replace('\\2', '\\3'); // First, look for nested blocks, e.g.: //
` blocks.
*/
Markdown_Parser.prototype.doCodeBlocks = function(text) {
var self = this;
text = this.__wrapSTXETX__(text);
text = text.replace(new RegExp(
'(?:^|\\n\\n|(?=\\x02)\\n)?' +
'(' + // $1 = the code block -- one or more lines, starting with a space/tab
'(?:' +
'(?=(' +
'[ ]{' + this.tab_width + ',}' + // Lines must start with a tab or a tab-width of spaces
'.*\\n+' +
'))\\2' +
')+' +
')' +
'((?=^[ ]{0,' + this.tab_width + '}\\S)|(?:\\n*(?=\\x03)))', // Lookahead for non-space at line-start, or end of doc
'mg'
), function(match, codeblock) {
//console.log(match);
codeblock = self.outdent(codeblock);
codeblock = self._php_htmlspecialchars_ENT_NOQUOTES(codeblock);
// trim leading newlines and trailing newlines
codeblock = self.__wrapSTXETX__(codeblock);
codeblock = codeblock.replace(/(?=\x02)\n+|\n+(?=\x03)/g, '');
codeblock = self.__unwrapSTXETX__(codeblock);
codeblock = "" + codeblock + "\n
";
return "\n\n" + self.hashBlock(codeblock) + "\n\n";
});
text = this.__unwrapSTXETX__(text);
return text;
};
/**
* Create a code span markup for $code. Called from handleSpanToken.
*/
Markdown_Parser.prototype.makeCodeSpan = function(code) {
code = this._php_htmlspecialchars_ENT_NOQUOTES(this._php_trim(code));
return this.hashPart("" + code + "
");
};
/**
* Prepare regular expressions for searching emphasis tokens in any
* context.
*/
Markdown_Parser.prototype.prepareItalicsAndBold = function() {
this.em_strong_prepared_relist = {};
for(var i = 0; i < this.em_relist.length; i++) {
var em = this.em_relist[i][0];
var em_re = this.em_relist[i][1];
for(var j = 0; j < this.strong_relist.length; j++) {
var strong = this.strong_relist[j][0];
var strong_re = this.strong_relist[j][1];
// Construct list of allowed token expressions.
var token_relist = [];
for(var k = 0; k < this.em_strong_relist.length; k++) {
var em_strong = this.em_strong_relist[k][0];
var em_strong_re = this.em_strong_relist[k][1];
if(em + strong == em_strong) {
token_relist.push(em_strong_re);
}
}
token_relist.push(em_re);
token_relist.push(strong_re);
// Construct master expression from list.
var token_re = new RegExp('(' + token_relist.join('|') + ')');
this.em_strong_prepared_relist['rx_' + em + strong] = token_re;
}
}
};
Markdown_Parser.prototype.doItalicsAndBold = function(text) {
var em = '';
var strong = '';
var tree_char_em = false;
var text_stack = [''];
var token_stack = [];
var token = '';
while (1) {
//
// Get prepared regular expression for seraching emphasis tokens
// in current context.
//
var token_re = this.em_strong_prepared_relist['rx_' + em + strong];
//
// Each loop iteration search for the next emphasis token.
// Each token is then passed to handleSpanToken.
//
var parts = text.match(token_re); //PREG_SPLIT_DELIM_CAPTURE
if(parts) {
var left = RegExp.leftContext;
var right = RegExp.rightContext;
var pre = "";
var marker = parts[1];
for(var mg = 2; mg < parts.length; mg += 2) {
if('undefined' !== typeof parts[mg] && parts[mg] != '') {
pre = parts[mg];
marker = parts[mg + 1];
break;
}
}
//console.log([left + pre, marker]);
text_stack[0] += (left + pre);
token = marker;
text = right;
}
else {
text_stack[0] += text;
token = '';
text = '';
}
if(token == '') {
// Reached end of text span: empty stack without emitting.
// any more emphasis.
while (token_stack.length > 0 && token_stack[0].length > 0) {
text_stack[1] += token_stack.shift();
var text_stack_prev0 = text_stack.shift(); // $text_stack[0] .= array_shift($text_stack);
text_stack[0] += text_stack_prev0;
}
break;
}
var tag, span;
var token_len = token.length;
if (tree_char_em) {
// Reached closing marker while inside a three-char emphasis.
if (token_len == 3) {
// Three-char closing marker, close em and strong.
token_stack.shift();
span = text_stack.shift();
span = this.runSpanGamut(span);
span = "" + span + "";
text_stack[0] += this.hashPart(span);
em = '';
strong = '';
} else {
// Other closing marker: close one em or strong and
// change current token state to match the other
token_stack[0] = this._php_str_repeat(token.charAt(0), 3 - token_len);
tag = token_len == 2 ? "strong" : "em";
span = text_stack[0];
span = this.runSpanGamut(span);
span = "<" + tag + ">" + span + "" + tag + ">";
text_stack[0] = this.hashPart(span);
if(tag == 'strong') { strong = ''; } else { em = ''; }
}
tree_char_em = false;
} else if (token_len == 3) {
if (em != '') {
// Reached closing marker for both em and strong.
// Closing strong marker:
for (var i = 0; i < 2; ++i) {
var shifted_token = token_stack.shift();
tag = shifted_token.length == 2 ? "strong" : "em";
span = text_stack.shift();
span = this.runSpanGamut(span);
span = "<" + tag + ">" + span + "" + tag + ">";
text_stack[0] = this.hashPart(span);
if(tag == 'strong') { strong = ''; } else { em = ''; }
}
} else {
// Reached opening three-char emphasis marker. Push on token
// stack; will be handled by the special condition above.
em = token.charAt(0);
strong = em + em;
token_stack.unshift(token);
text_stack.unshift('');
tree_char_em = true;
}
} else if (token_len == 2) {
if (strong != '') {
// Unwind any dangling emphasis marker:
if (token_stack[0].length == 1) {
text_stack[1] += token_stack.shift();
var text_stack_prev0 = text_stack.shift(); // $text_stack[0] .= array_shift($text_stack);
text_stack[0] += text_stack_prev0;
}
// Closing strong marker:
token_stack.shift();
span = text_stack.shift();
span = this.runSpanGamut(span);
span = "" + span + "";
text_stack[0] += this.hashPart(span);
strong = '';
} else {
token_stack.unshift(token);
text_stack.unshift('');
strong = token;
}
} else {
// Here $token_len == 1
if (em != '') {
if (token_stack[0].length == 1) {
// Closing emphasis marker:
token_stack.shift();
span = text_stack.shift();
span = this.runSpanGamut(span);
span = "" + span + "";
text_stack[0] += this.hashPart(span);
em = '';
} else {
text_stack[0] += token;
}
} else {
token_stack.unshift(token);
text_stack.unshift('');
em = token;
}
}
}
return text_stack[0];
};
Markdown_Parser.prototype.doBlockQuotes = function(text) {
var self = this;
text = text.replace(new RegExp(
'(' + // Wrap whole match in $1
'(?:' +
'^[ ]*>[ ]?' + // ">" at the start of a line
'.+\\n' + // rest of the first line
'(.+\\n)*' + // subsequent consecutive lines
'\\n*' + // blanks
')+' +
')',
'mg'
), function(match, bq) {
//console.log(match);
// trim one level of quoting - trim whitespace-only lines
bq = bq.replace(/^[ ]*>[ ]?|^[ ]+$/mg, '');
bq = self.runBlockGamut(bq); // recurse
bq = bq.replace(/^/mg, " ");
// These leading spaces cause problem with content,
// so we need to fix that:
bq = bq.replace(/(\\s*[\\s\\S]+?<\/pre>)/mg, function(match, pre) {
//console.log(match);
pre = pre.replace(/^ /m, '');
return pre;
});
return "\n" + self.hashBlock("\n" + bq + "\n
") + "\n\n";
});
return text;
};
/**
* Params:
* $text - string to process with html tags
*/
Markdown_Parser.prototype.formParagraphs = function(text) {
// Strip leading and trailing lines:
text = this.__wrapSTXETX__(text);
text = text.replace(/(?:\x02)\n+|\n+(?:\x03)/g, "");
text = this.__unwrapSTXETX__(text);
// [porting note]
// below may be faster than js regexp.
//for(var s = 0; s < text.length && text.charAt(s) == "\n"; s++) { }
//text = text.substr(s);
//for(var e = text.length; e > 0 && text.charAt(e - 1) == "\n"; e--) { }
//text = text.substr(0, e);
var grafs = text.split(/\n{2,}/m);
//preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
//
// Wrap
tags and unhashify HTML blocks
//
for(var i = 0; i < grafs.length; i++) {
var value = grafs[i];
if(value == "") {
// [porting note]
// This case is replacement for PREG_SPLIT_NO_EMPTY.
}
else if (!value.match(/^B\x1A[0-9]+B$/)) {
// Is a paragraph.
value = this.runSpanGamut(value);
value = value.replace(/^([ ]*)/, "
");
value += "
";
grafs[i] = this.unhash(value);
}
else {
// Is a block.
// Modify elements of @grafs in-place...
var graf = value;
var block = this.html_hashes[graf];
graf = block;
//if (preg_match('{
// \A
// ( # $1 = tag
// ]*
// \b
// markdown\s*=\s* ([\'"]) # $2 = attr quote char
// 1
// \2
// [^>]*
// >
// )
// ( # $3 = contents
// .*
// )
// () # $4 = closing tag
// \z
// }xs', $block, $matches))
//{
// list(, $div_open, , $div_content, $div_close) = $matches;
//
// # We can't call Markdown(), because that resets the hash;
// # that initialization code should be pulled into its own sub, though.
// $div_content = $this->hashHTMLBlocks($div_content);
//
// # Run document gamut methods on the content.
// foreach ($this->document_gamut as $method => $priority) {
// $div_content = $this->$method($div_content);
// }
//
// $div_open = preg_replace(
// '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
//
// $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
//}
grafs[i] = graf;
}
}
return grafs.join("\n\n");
};
/**
* Encode text for a double-quoted HTML attribute. This function
* is *not* suitable for attributes enclosed in single quotes.
*/
Markdown_Parser.prototype.encodeAttribute = function(text) {
text = this.encodeAmpsAndAngles(text);
text = text.replace(/"/g, '"');
return text;
};
/**
* Smart processing for ampersands and angle brackets that need to
* be encoded. Valid character entities are left alone unless the
* no-entities mode is set.
*/
Markdown_Parser.prototype.encodeAmpsAndAngles = function(text) {
if (this.no_entities) {
text = text.replace(/&/g, '&');
} else {
// Ampersand-encoding based entirely on Nat Irons's Amputator
// MT plugin:
text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/, '&');
}
// Encode remaining <'s
text = text.replace(/\s]+)>/i, function(match, address) {
//console.log(match);
var url = self.encodeAttribute(address);
var link = "" + url + "";
return self.hashPart(link);
});
// Email addresses:
text = text.replace(new RegExp(
'<' +
'(?:mailto:)?' +
'(' +
'(?:' +
'[-!#$%&\'*+/=?^_`.{|}~\\w\\x80-\\xFF]+' +
'|' +
'".*?"' +
')' +
'\\@' +
'(?:' +
'[-a-z0-9\\x80-\\xFF]+(\\.[-a-z0-9\\x80-\\xFF]+)*\\.[a-z]+' +
'|' +
'\\[[\\d.a-fA-F:]+\\]' + // IPv4 & IPv6
')' +
')' +
'>',
'i'
), function(match, address) {
//console.log(match);
var link = self.encodeEmailAddress(address);
return self.hashPart(link);
});
return text;
};
/**
* Input: an email address, e.g. "foo@example.com"
*
* Output: the email address as a mailto link, with each character
* of the address encoded as either a decimal or hex entity, in
* the hopes of foiling most address harvesting spam bots. E.g.:
*
*
*
* Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
* With some optimizations by Milian Wolff.
*/
Markdown_Parser.prototype.encodeEmailAddress = function(addr) {
if('undefined' === typeof arguments.callee.crctable) {
arguments.callee.crctable =
"00000000 77073096 EE0E612C 990951BA 076DC419 706AF48F E963A535 9E6495A3 " +
"0EDB8832 79DCB8A4 E0D5E91E 97D2D988 09B64C2B 7EB17CBD E7B82D07 90BF1D91 " +
"1DB71064 6AB020F2 F3B97148 84BE41DE 1ADAD47D 6DDDE4EB F4D4B551 83D385C7 " +
"136C9856 646BA8C0 FD62F97A 8A65C9EC 14015C4F 63066CD9 FA0F3D63 8D080DF5 " +
"3B6E20C8 4C69105E D56041E4 A2677172 3C03E4D1 4B04D447 D20D85FD A50AB56B " +
"35B5A8FA 42B2986C DBBBC9D6 ACBCF940 32D86CE3 45DF5C75 DCD60DCF ABD13D59 " +
"26D930AC 51DE003A C8D75180 BFD06116 21B4F4B5 56B3C423 CFBA9599 B8BDA50F " +
"2802B89E 5F058808 C60CD9B2 B10BE924 2F6F7C87 58684C11 C1611DAB B6662D3D " +
"76DC4190 01DB7106 98D220BC EFD5102A 71B18589 06B6B51F 9FBFE4A5 E8B8D433 " +
"7807C9A2 0F00F934 9609A88E E10E9818 7F6A0DBB 086D3D2D 91646C97 E6635C01 " +
"6B6B51F4 1C6C6162 856530D8 F262004E 6C0695ED 1B01A57B 8208F4C1 F50FC457 " +
"65B0D9C6 12B7E950 8BBEB8EA FCB9887C 62DD1DDF 15DA2D49 8CD37CF3 FBD44C65 " +
"4DB26158 3AB551CE A3BC0074 D4BB30E2 4ADFA541 3DD895D7 A4D1C46D D3D6F4FB " +
"4369E96A 346ED9FC AD678846 DA60B8D0 44042D73 33031DE5 AA0A4C5F DD0D7CC9 " +
"5005713C 270241AA BE0B1010 C90C2086 5768B525 206F85B3 B966D409 CE61E49F " +
"5EDEF90E 29D9C998 B0D09822 C7D7A8B4 59B33D17 2EB40D81 B7BD5C3B C0BA6CAD " +
"EDB88320 9ABFB3B6 03B6E20C 74B1D29A EAD54739 9DD277AF 04DB2615 73DC1683 " +
"E3630B12 94643B84 0D6D6A3E 7A6A5AA8 E40ECF0B 9309FF9D 0A00AE27 7D079EB1 " +
"F00F9344 8708A3D2 1E01F268 6906C2FE F762575D 806567CB 196C3671 6E6B06E7 " +
"FED41B76 89D32BE0 10DA7A5A 67DD4ACC F9B9DF6F 8EBEEFF9 17B7BE43 60B08ED5 " +
"D6D6A3E8 A1D1937E 38D8C2C4 4FDFF252 D1BB67F1 A6BC5767 3FB506DD 48B2364B " +
"D80D2BDA AF0A1B4C 36034AF6 41047A60 DF60EFC3 A867DF55 316E8EEF 4669BE79 " +
"CB61B38C BC66831A 256FD2A0 5268E236 CC0C7795 BB0B4703 220216B9 5505262F " +
"C5BA3BBE B2BD0B28 2BB45A92 5CB36A04 C2D7FFA7 B5D0CF31 2CD99E8B 5BDEAE1D " +
"9B64C2B0 EC63F226 756AA39C 026D930A 9C0906A9 EB0E363F 72076785 05005713 " +
"95BF4A82 E2B87A14 7BB12BAE 0CB61B38 92D28E9B E5D5BE0D 7CDCEFB7 0BDBDF21 " +
"86D3D2D4 F1D4E242 68DDB3F8 1FDA836E 81BE16CD F6B9265B 6FB077E1 18B74777 " +
"88085AE6 FF0F6A70 66063BCA 11010B5C 8F659EFF F862AE69 616BFFD3 166CCF45 " +
"A00AE278 D70DD2EE 4E048354 3903B3C2 A7672661 D06016F7 4969474D 3E6E77DB " +
"AED16A4A D9D65ADC 40DF0B66 37D83BF0 A9BCAE53 DEBB9EC5 47B2CF7F 30B5FFE9 " +
"BDBDF21C CABAC28A 53B39330 24B4A3A6 BAD03605 CDD70693 54DE5729 23D967BF " +
"B3667A2E C4614AB8 5D681B02 2A6F2B94 B40BBE37 C30C8EA1 5A05DF1B 2D02EF8D".split(' ');
}
var crctable = arguments.callee.crctable;
function _crc32(str) {
var crc = 0;
crc = crc ^ (-1);
for (var i = 0; i < str.length; ++i) {
var y = (crc ^ str.charCodeAt(i)) & 0xff;
var x = "0x" + crctable[y];
crc = (crc >>> 8) ^ x;
}
return (crc ^ (-1)) >>> 0;
}
addr = "mailto:" + addr;
var chars = [];
var i;
for(i = 0; i < addr.length; i++) {
chars.push(addr.charAt(i));
}
var seed = Math.floor(Math.abs(_crc32(addr) / addr.length)); // # Deterministic seed.
for(i = 0; i < chars.length; i++) {
var c = chars[i];
var ord = c.charCodeAt(0);
// Ignore non-ascii chars.
if(ord < 128) {
var r = (seed * (1 + i)) % 100; // Pseudo-random function.
// roughly 10% raw, 45% hex, 45% dec
// '@' *must* be encoded. I insist.
if(r > 90 && c != '@') { /* do nothing */ }
else if(r < 45) { chars[i] = '' + ord.toString(16) + ';'; }
else { chars[i] = '' + ord.toString(10) + ';'; }
}
}
addr = chars.join('');
var text = chars.splice(7, chars.length - 1).join(''); // text without `mailto:`
addr = "" + text + "";
return addr;
};
/**
* Take the string $str and parse it into tokens, hashing embeded HTML,
* escaped characters and handling code spans.
*/
Markdown_Parser.prototype.parseSpan = function(str) {
var output = '';
var span_re = new RegExp(
'(' +
'\\\\' + this.escape_chars_re +
'|' +
// This expression is too difficult for JS: '(?' + // comment
'|' +
'<\\?.*?\\?>|<%.*?%>' + // processing instruction
'|' +
'<[/!$]?[-a-zA-Z0-9:_]+' + // regular tags
'(?=' +
'\\s' +
'(?=[^"\'>]+|"[^"]*"|\'[^\']*\')*' +
')?' +
'>'
)) +
')'
);
while(1) {
//
// Each loop iteration seach for either the next tag, the next
// openning code span marker, or the next escaped character.
// Each token is then passed to handleSpanToken.
//
var parts = str.match(span_re); //PREG_SPLIT_DELIM_CAPTURE
if(parts) {
if(RegExp.leftContext) {
output += RegExp.leftContext;
}
// Back quote but after backslash is to be ignored.
if(RegExp.lastMatch.charAt(0) == "`" &&
RegExp.leftContext.charAt(RegExp.leftContext.length - 1) == "\\"
) {
output += RegExp.lastMatch;
str = RegExp.rightContext;
continue;
}
var r = this.handleSpanToken(RegExp.lastMatch, RegExp.rightContext);
output += r[0];
str = r[1];
}
else {
output += str;
break;
}
}
return output;
};
/**
* Handle $token provided by parseSpan by determining its nature and
* returning the corresponding value that should replace it.
*/
Markdown_Parser.prototype.handleSpanToken = function(token, str) {
//console.log([token, str]);
switch (token.charAt(0)) {
case "\\":
return [this.hashPart("" + token.charCodeAt(1) + ";"), str];
case "`":
// Search for end marker in remaining text.
if (str.match(new RegExp('^([\\s\\S]*?[^`])' + this._php_preg_quote(token) + '(?!`)([\\s\\S]*)$', 'm'))) {
var code = RegExp.$1;
str = RegExp.$2;
var codespan = this.makeCodeSpan(code);
return [this.hashPart(codespan), str];
}
return [token, str]; // return as text since no ending marker found.
default:
return [this.hashPart(token), str];
}
};
/**
* Remove one level of line-leading tabs or spaces
*/
Markdown_Parser.prototype.outdent = function(text) {
return text.replace(new RegExp('^(\\t|[ ]{1,' + this.tab_width + '})', 'mg'), '');
};
//# String length function for detab. `_initDetab` will create a function to
//# hanlde UTF-8 if the default function does not exist.
//var $utf8_strlen = 'mb_strlen';
/**
* Replace tabs with the appropriate amount of space.
*/
Markdown_Parser.prototype.detab = function(text) {
// For each line we separate the line in blocks delemited by
// tab characters. Then we reconstruct every line by adding the
// appropriate number of space between each blocks.
var self = this;
return text.replace(/^.*\t.*$/mg, function(line) {
//$strlen = $this->utf8_strlen; # strlen function for UTF-8.
// Split in blocks.
var blocks = line.split("\t");
// Add each blocks to the line.
line = blocks.shift(); // Do not add first block twice.
for(var i = 0; i < blocks.length; i++) {
var block = blocks[i];
// Calculate amount of space, insert spaces, insert block.
var amount = self.tab_width - line.length % self.tab_width;
line += self._php_str_repeat(" ", amount) + block;
}
return line;
});
};
/**
* Swap back in all the tags hashed by _HashHTMLBlocks.
*/
Markdown_Parser.prototype.unhash = function(text) {
var self = this;
return text.replace(/(.)\x1A[0-9]+\1/g, function(match) {
return self.html_hashes[match];
});
};
/*-------------------------------------------------------------------------*/
/**
* Constructor function. Initialize the parser object.
*/
function MarkdownExtra_Parser() {
// Prefix for footnote ids.
this.fn_id_prefix = "";
// Optional title attribute for footnote links and backlinks.
this.fn_link_title = MARKDOWN_FN_LINK_TITLE;
this.fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
// Optional class attribute for footnote links and backlinks.
this.fn_link_class = MARKDOWN_FN_LINK_CLASS;
this.fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
// Predefined abbreviations.
this.predef_abbr = {};
// Extra variables used during extra transformations.
this.footnotes = {};
this.footnotes_ordered = [];
this.abbr_desciptions = {};
this.abbr_word_re = '';
// Give the current footnote number.
this.footnote_counter = 1;
// ### HTML Block Parser ###
// Tags that are always treated as block tags:
this.block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
// Tags treated as block tags only if the opening tag is alone on it's line:
this.context_block_tags_re = 'script|noscript|math|ins|del';
// Tags where markdown="1" default to span mode:
this.contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
// Tags which must not have their contents modified, no matter where
// they appear:
this.clean_tags_re = 'script|math';
// Tags that do not need to be closed.
this.auto_close_tags_re = 'hr|img';
// Redefining emphasis markers so that emphasis by underscore does not
// work in the middle of a word.
this.em_relist = [
['' , '(?:(^|[^\\*])(\\*)(?=[^\\*])|(^|[^a-zA-Z0-9_])(_)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'],
['*', '((?:\\S|^)[^\\*])(\\*)(?!\\*)'],
['_', '((?:\\S|^)[^_])(_)(?![a-zA-Z0-9_])']
];
this.strong_relist = [
['' , '(?:(^|[^\\*])(\\*\\*)(?=[^\\*])|(^|[^a-zA-Z0-9_])(__)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'],
['**', '((?:\\S|^)[^\\*])(\\*\\*)(?!\\*)'],
['__', '((?:\\S|^)[^_])(__)(?![a-zA-Z0-9_])']
];
this.em_strong_relist = [
['' , '(?:(^|[^\\*])(\\*\\*\\*)(?=[^\\*])|(^|[^a-zA-Z0-9_])(___)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'],
['***', '((?:\\S|^)[^\\*])(\\*\\*\\*)(?!\\*)'],
['___', '((?:\\S|^)[^_])(___)(?![a-zA-Z0-9_])']
];
// Add extra escapable characters before parent constructor
// initialize the table.
this.escape_chars += ':|';
// Insert extra document, block, and span transformations.
// Parent constructor will do the sorting.
this.document_gamut.push(['doFencedCodeBlocks', 5]);
this.document_gamut.push(['stripFootnotes', 15]);
this.document_gamut.push(['stripAbbreviations', 25]);
this.document_gamut.push(['appendFootnotes', 50]);
this.block_gamut.push(['doFencedCodeBlocks', 5]);
this.block_gamut.push(['doTables', 15]);
this.block_gamut.push(['doDefLists', 45]);
this.span_gamut.push(['doFootnotes', 5]);
this.span_gamut.push(['doAbbreviations', 70]);
}
MarkdownExtra_Parser.prototype = new Markdown_Parser();
/**
* Setting up Extra-specific variables.
*/
MarkdownExtra_Parser.prototype.setup = function() {
this.constructor.prototype.setup.call(this);
this.footnotes = {};
this.footnotes_ordered = [];
this.abbr_desciptions = {};
this.abbr_word_re = '';
this.footnote_counter = 1;
for(var abbr_word in this.predef_abbr) {
var abbr_desc = this.predef_abbr[abbr_word];
if(this.abbr_word_re != '') {
this.abbr_word_re += '|';
}
this.abbr_word_re += this._php_preg_quote(abbr_word); // ?? str -> re?
this.abbr_desciptions[abbr_word] = this._php_trim(abbr_desc);
}
};
/**
* Clearing Extra-specific variables.
*/
MarkdownExtra_Parser.prototype.teardown = function() {
this.footnotes = {};
this.footnotes_ordered = [];
this.abbr_desciptions = {};
this.abbr_word_re = '';
this.constructor.prototype.teardown.call(this);
};
/**
* Hashify HTML Blocks and "clean tags".
*
* We only want to do this for block-level HTML tags, such as headers,
* lists, and tables. That's because we still want to wrap s around
* "paragraphs" that are wrapped in non-block-level tags, such as anchors,
* phrase emphasis, and spans. The list of tags we're looking for is
* hard-coded.
*
* This works by calling _HashHTMLBlocks_InMarkdown, which then calls
* _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
* attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
* _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
* These two functions are calling each other. It's recursive!
*/
MarkdownExtra_Parser.prototype.hashHTMLBlocks = function(text) {
//
// Call the HTML-in-Markdown hasher.
//
var r = this._hashHTMLBlocks_inMarkdown(text);
text = r[0];
return text;
};
/**
* Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
*
* * $indent is the number of space to be ignored when checking for code
* blocks. This is important because if we don't take the indent into
* account, something like this (which looks right) won't work as expected:
*
*
*
* Hello World. <-- Is this a Markdown code block or text?
* <-- Is this a Markdown code block or a real tag?
*
*
* If you don't like this, just don't indent the tag on which
* you apply the markdown="1" attribute.
*
* * If $enclosing_tag_re is not empty, stops at the first unmatched closing
* tag with that name. Nested tags supported.
*
* * If $span is true, text inside must treated as span. So any double
* newline will be replaced by a single newline so that it does not create
* paragraphs.
*
* Returns an array of that form: ( processed text , remaining text )
*/
MarkdownExtra_Parser.prototype._hashHTMLBlocks_inMarkdown = function(text, indent, enclosing_tag_re, span) {
if('undefined' === typeof indent) { indent = 0; }
if('undefined' === typeof enclosing_tag_re) { enclosing_tag_re = ''; }
if('undefined' === typeof span) { span = false; }
if(text === '') { return ['', '']; }
var matches;
// Regex to check for the presense of newlines around a block tag.
var newline_before_re = /(?:^\n?|\n\n)*$/;
var newline_after_re = new RegExp(
'^' + // Start of text following the tag.
'([ ]*)?' + // Optional comment.
'[ ]*\\n' , // Must be followed by newline.
'm'
);
// Regex to match any tag.
var block_tag_re = new RegExp(
'(' + // $2: Capture hole tag.
'?' + // Any opening or closing tag.
'(' + // Tag name.
this.block_tags_re + '|' +
this.context_block_tags_re + '|' +
this.clean_tags_re + '|' +
'(?!\\s)' + enclosing_tag_re +
')' +
'(?:' +
'(?=[\\s"\'/a-zA-Z0-9])' + // Allowed characters after tag name.
'(?=(' +
'".*?"|' + // Double quotes (can contain `>`)
'\'.*?\'|' + // Single quotes (can contain `>`)
'.+?' + // Anything but quotes and `>`.
'))\\3*?' +
')?' +
'>' + // End of tag.
'|' +
'' + // HTML Comment
'|' +
'<\\?.*?\\?>|<%.*?%>' + // Processing instruction
'|' +
'' + // CData Block
'|' +
// Code span marker
'`+' +
( !span ? // If not in span.
'|' +
// Indented code block
'(?:^[ ]*\\n|^|\\n[ ]*\\n)' +
'[ ]{' + (indent + 4) + '}[^\\n]*\\n' +
'(?=' +
'(?:[ ]{' + (indent + 4) + '}[^\\n]*|[ ]*)\\n' +
')*' +
'|' +
// Fenced code block marker
'(?:^|\\n)' +
'[ ]{0,' + indent + '}~~~+[ ]*\\n'
: '' ) + // # End (if not is span).
')',
'm'
);
var depth = 0; // Current depth inside the tag tree.
var parsed = ""; // Parsed text that will be returned.
//
// Loop through every tag until we find the closing tag of the parent
// or loop until reaching the end of text if no parent tag specified.
//
do {
//
// Split the text using the first $tag_match pattern found.
// Text before pattern will be first in the array, text after
// pattern will be at the end, and between will be any catches made
// by the pattern.
//
var parts_available = text.match(block_tag_re); //PREG_SPLIT_DELIM_CAPTURE
var parts;
if(!parts_available) {
parts = [text];
}
else {
parts = [RegExp.leftContext, RegExp.lastMatch, RegExp.rightContext];
}
// If in Markdown span mode, add a empty-string span-level hash
// after each newline to prevent triggering any block element.
if(span) {
var _void = this.hashPart("", ':');
var newline = _void + "\n";
parts[0] = _void + parts[0].replace(/\n/g, newline) + _void;
}
parsed += parts[0]; // Text before current tag.
// If end of $text has been reached. Stop loop.
if(!parts_available) {
text = "";
break;
}
var tag = parts[1]; // Tag to handle.
text = parts[2]; // Remaining text after current tag.
var tag_re = this._php_preg_quote(tag); // For use in a regular expression.
var t;
var block_text;
//
// Check for: Code span marker
//
if (tag.charAt(0) == "`") {
// Find corresponding end marker.
tag_re = this._php_preg_quote(tag);
if (matches = text.match(new RegExp('^((?=(.+?|\\n[^\\n])))/1*?[^`]' + tag_re + '[^`]'))) {
// End marker found: pass text unchanged until marker.
parsed += tag + matches[0];
text = text.substr(matches[0].length);
}
else {
// Unmatched marker: just skip it.
parsed += tag;
}
}
//
// Check for: Fenced code block marker.
//
else if(tag.match(new RegExp('^\\n?[ ]{0,' + (indent + 3) + '}~'))) {
// Fenced code block marker: find matching end marker.
tag_re = this._php_preg_quote(this._php_trim(tag));
if(matches = text.match(new RegExp('^(?:.*\\n)+?[ ]{0,' + indent + '}' + tag_re + '[ ]*\\n'))) {
// End marker found: pass text unchanged until marker.
parsed += tag + matches[0];
text = text.substr(matches[0].length);
}
else {
// No end marker: just skip it.
parsed += tag;
}
}
//
// Check for: Indented code block.
//
else if(tag.charAt(0) == "\n" || tag.charAt(0) == " ") {
// Indented code block: pass it unchanged, will be handled
// later.
parsed += tag;
}
//
// Check for: Opening Block level tag or
// Opening Context Block tag (like ins and del)
// used as a block tag (tag is alone on it's line).
//
else if (tag.match(new RegExp('^<(?:' + this.block_tags_re + ')\\b')) ||
(
tag.match(new RegExp('^<(?:' + this.context_block_tags_re + ')\\b')) &&
parsed.match(newline_before_re) &&
text.match(newline_after_re)
)
) {
// Need to parse tag and following text using the HTML parser.
t = this._hashHTMLBlocks_inHTML(tag + text, this.hashBlock, true);
block_text = t[0];
text = t[1];
// Make sure it stays outside of any paragraph by adding newlines.
parsed += "\n\n" + block_text + "\n\n";
}
//
// Check for: Clean tag (like script, math)
// HTML Comments, processing instructions.
//
else if(
tag.match(new RegExp('^<(?:' + this.clean_tags_re + ')\\b')) ||
tag.charAt(1) == '!' || tag.charAt(1) == '?'
) {
// Need to parse tag and following text using the HTML parser.
// (don't check for markdown attribute)
t = this._hashHTMLBlocks_inHTML(tag + text, this.hashClean, false);
block_text = t[0];
text = t[1];
parsed += block_text;
}
//
// Check for: Tag with same name as enclosing tag.
//
else if (enclosing_tag_re !== '' &&
// Same name as enclosing tag.
tag.match(new RegExp('^?(?:' + enclosing_tag_re + ')\\b'))
) {
//
// Increase/decrease nested tag count.
//
if (tag.charAt(1) == '/') depth--;
else if (tag.charAt(tag.length - 2) != '/') depth++;
if(depth < 0) {
//
// Going out of parent element. Clean up and break so we
// return to the calling function.
//
text = tag + text;
break;
}
parsed += tag;
}
else {
parsed += tag;
}
} while(depth >= 0);
return [parsed, text];
};
/**
* Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
*
* * Calls $hash_method to convert any blocks.
* * Stops when the first opening tag closes.
* * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
* (it is not inside clean tags)
*
* Returns an array of that form: ( processed text , remaining text )
*/
MarkdownExtra_Parser.prototype._hashHTMLBlocks_inHTML = function(text, hash_method, md_attr) {
if(text === '') return ['', ''];
var matches;
// Regex to match `markdown` attribute inside of a tag.
var markdown_attr_re = new RegExp(
'\\s*' + // Eat whitespace before the `markdown` attribute
'markdown' +
'\\s*=\\s*' +
'(?:' +
'(["\'])' + // $1: quote delimiter
'(.*?)' + // $2: attribute value
'\\1' + // matching delimiter
'|' +
'([^\\s>]*)' + // $3: unquoted attribute value
')' +
'()' // $4: make $3 always defined (avoid warnings)
);
// Regex to match any tag.
var tag_re = new RegExp(
'(' + // $2: Capture hole tag.
'?' + // Any opening or closing tag.
'[\\w:$]+' + // Tag name.
'(?:' +
'(?=[\\s"\'/a-zA-Z0-9])' + // Allowed characters after tag name.
'(?:' +
'(?=(' +
'".*?"|' + // Double quotes (can contain `>`)
'\'.*?\'|' + // Single quotes (can contain `>`)
'.+?' + // Anything but quotes and `>`.
'))\\4' +
')*?' +
')?' +
'>' + // End of tag.
'|' +
'' + // HTML Comment
'|' +
'<\\?.*?\\?>|<%.*?%>' + // Processing instruction
'|' +
'' + // CData Block
')'
);
var original_text = text; // Save original text in case of faliure.
var depth = 0; // Current depth inside the tag tree.
var block_text = ""; // Temporary text holder for current text.
var parsed = ""; // Parsed text that will be returned.
//
// Get the name of the starting tag.
// (This pattern makes $base_tag_name_re safe without quoting.)
//
var base_tag_name_re = "";
if(matches = text.match(/^<([\w:$]*)\b/)) {
base_tag_name_re = matches[1];
}
//
// Loop through every tag until we find the corresponding closing tag.
//
do {
//
// Split the text using the first $tag_match pattern found.
// Text before pattern will be first in the array, text after
// pattern will be at the end, and between will be any catches made
// by the pattern.
//
var parts_available = text.match(tag_re); //PREG_SPLIT_DELIM_CAPTURE);
// If end of $text has been reached. Stop loop.
if(!parts_available) {
//
// End of $text reached with unbalenced tag(s).
// In that case, we return original text unchanged and pass the
// first character as filtered to prevent an infinite loop in the
// parent function.
//
return [original_text.charAt(0), original_text.substr(1)];
}
var parts = [RegExp.leftContext, RegExp.lastMatch, RegExp.rightContext];
block_text += parts[0]; // Text before current tag.
var tag = parts[1]; // Tag to handle.
text = parts[2]; // Remaining text after current tag.
//
// Check for: Auto-close tag (like
)
// Comments and Processing Instructions.
//
if(tag.match(new RegExp('^?(?:' + this.auto_close_tags_re + ')\\b')) ||
tag.charAt(1) == '!' || tag.charAt(1) == '?')
{
// Just add the tag to the block as if it was text.
block_text += tag;
}
else {
//
// Increase/decrease nested tag count. Only do so if
// the tag's name match base tag's.
//
if (tag.match(new RegExp('^?' + base_tag_name_re + '\\b'))) {
if(tag.charAt(1) == '/') { depth--; }
else if(tag.charAt(tag.length - 2) != '/') { depth++; }
}
//
// Check for `markdown="1"` attribute and handle it.
//
var attr_m;
if(md_attr &&
(attr_m = tag.match(markdown_attr_re)) &&
(attr_m[2] + attr_m[3]).match(/^1|block|span$/))
{
// Remove `markdown` attribute from opening tag.
tag = tag.replace(markdown_attr_re, '');
// Check if text inside this tag must be parsed in span mode.
this.mode = attr_m[2] + attr_m[3];
var span_mode = this.mode == 'span' || this.mode != 'block' &&
tag.match(new RegExp('^<(?:' + this.contain_span_tags_re + ')\\b'));
// Calculate indent before tag.
var indent;
if (matches = block_text.match(/(?:^|\n)( *?)(?! ).*?$/)) {
//var strlen = this.utf8_strlen;
indent = matches[1].length; //strlen(matches[1], 'UTF-8');
} else {
indent = 0;
}
// End preceding block with this tag.
block_text += tag;
parsed += hash_method.call(this, block_text);
// Get enclosing tag name for the ParseMarkdown function.
// (This pattern makes $tag_name_re safe without quoting.)
matches = tag.match(/^<([\w:$]*)\b/);
var tag_name_re = matches[1];
// Parse the content using the HTML-in-Markdown parser.
var t = this._hashHTMLBlocks_inMarkdown(text, indent, tag_name_re, span_mode);
block_text = t[0];
text = t[1];
// Outdent markdown text.
if(indent > 0) {
block_text = block_text.replace(new RegExp('/^[ ]{1,' + indent + '}', 'm'), "");
}
// Append tag content to parsed text.
if (!span_mode) { parsed += "\n\n" + block_text + "\n\n"; }
else { parsed += block_text; }
// Start over a new block.
block_text = "";
}
else {
block_text += tag;
}
}
} while(depth > 0);
//
// Hash last block text that wasn't processed inside the loop.
//
parsed += hash_method.call(this, block_text);
return [parsed, text];
};
/**
* Called whenever a tag must be hashed when a function insert a "clean" tag
* in $text, it pass through this function and is automaticaly escaped,
* blocking invalid nested overlap.
*/
MarkdownExtra_Parser.prototype.hashClean = function(text) {
return this.hashPart(text, 'C');
};
/**
* Redefined to add id attribute support.
*/
MarkdownExtra_Parser.prototype.doHeaders = function(text) {
var self = this;
function _doHeaders_attr(attr) {
if('undefined' === typeof attr || attr == "") { return ""; }
return " id=\"" + attr + "\"";
}
// Setext-style headers:
// Header 1 {#header1}
// ========
//
// Header 2 {#header2}
// --------
text = text.replace(new RegExp(
'(^.+?)' + // $1: Header text
'(?:[ ]+\\{\\#([-_:a-zA-Z0-9]+)\\})?' + // $2: Id attribute
'[ ]*\\n(=+|-+)[ ]*\\n+', // $3: Header footer
'mg'
), function(match, span, id, line) {
//console.log(match);
if(line == '-' && span.match(/^- /)) {
return match;
}
var level = line.charAt(0) == '=' ? 1 : 2;
var attr = _doHeaders_attr(id);
var block = "" + self.runSpanGamut(span) + " ";
return "\n" + self.hashBlock(block) + "\n\n";
});
// atx-style headers:
// # Header 1 {#header1}
// ## Header 2 {#header2}
// ## Header 2 with closing hashes ## {#header3}
// ...
// ###### Header 6 {#header2}
text = text.replace(new RegExp(
'^(\\#{1,6})' + // $1 = string of #\'s
'[ ]*' +
'(.+?)' + // $2 = Header text
'[ ]*' +
'\\#*' + // optional closing #\'s (not counted)
'(?:[ ]+\\{\\#([-_:a-zA-Z0-9]+)\\})?' + // id attribute
'\\n+',
'mg'
), function(match, hashes, span, id) {
//console.log(match);
var level = hashes.length;
var attr = _doHeaders_attr(id);
var block = "" + self.runSpanGamut(span) + " ";
return "\n" + self.hashBlock(block) + "\n\n";
});
return text;
};
/**
* Form HTML tables.
*/
MarkdownExtra_Parser.prototype.doTables = function(text) {
var self = this;
var less_than_tab = this.tab_width - 1;
var _doTable_callback = function(match, head, underline, content) {
//console.log(match);
// Remove any tailing pipes for each line.
head = head.replace(/[|] *$/m, '');
underline = underline.replace(/[|] *$/m, '');
content = content.replace(/[|] *$/m, '');
var attr = [];
// Reading alignement from header underline.
var separators = underline.split(/[ ]*[|][ ]*/);
var n;
for(n = 0; n < separators.length; n++) {
var s = separators[n];
if (s.match(/^ *-+: *$/)) { attr[n] = ' align="right"'; }
else if (s.match(/^ *:-+: *$/)) { attr[n] = ' align="center"'; }
else if (s.match(/^ *:-+ *$/)) { attr[n] = ' align="left"'; }
else { attr[n] = ''; }
}
// Parsing span elements, including code spans, character escapes,
// and inline HTML tags, so that pipes inside those gets ignored.
head = self.parseSpan(head);
var headers = head.split(/ *[|] */);
var col_count = headers.length;
// Write column headers.
var text = "\n";
text += "\n";
text += "\n";
for(n = 0; n < headers.length; n++) {
var header = headers[n];
text += " " + self.runSpanGamut(self._php_trim(header)) + " \n";
}
text += " \n";
text += "\n";
// Split content by row.
var rows = self._php_trim(content, "\n").split("\n");
text += "\n";
for(var i = 0; i < rows.length; i++) {
var row = rows[i];
// Parsing span elements, including code spans, character escapes,
// and inline HTML tags, so that pipes inside those gets ignored.
row = self.parseSpan(row);
// Split row by cell.
var row_cells = row.split(/ *[|] */, col_count);
while(row_cells.length < col_count) { row_cells.push(''); }
text += "\n";
for(n = 0; n < row_cells.length; n++) {
var cell = row_cells[n];
text += " " + self.runSpanGamut(self._php_trim(cell)) + " \n";
}
text += " \n";
}
text += "\n";
text += "
";
return self.hashBlock(text) + "\n";
};
text = this.__wrapSTXETX__(text);
//
// Find tables with leading pipe.
//
// | Header 1 | Header 2
// | -------- | --------
// | Cell 1 | Cell 2
// | Cell 3 | Cell 4
//
text = text.replace(new RegExp(
'^' + // Start of a line
'[ ]{0,' + less_than_tab + '}' + // Allowed whitespace.
'[|]' + // Optional leading pipe (present)
'(.+)\\n' + // $1: Header row (at least one pipe)
'[ ]{0,' + less_than_tab + '}' + // Allowed whitespace.
'[|]([ ]*[-:]+[-| :]*)\\n' + // $2: Header underline
'(' + // $3: Cells
'(?:' +
'[ ]*' + // Allowed whitespace.
'[|].*\\n' + // Row content.
')*' +
')' +
'(?=\\n|\\x03)' , // Stop at final double newline.
'mg'
), function(match, head, underline, content) {
// Remove leading pipe for each row.
content = content.replace(/^ *[|]/m, '');
return _doTable_callback.call(this, match, head, underline, content);
});
//
// Find tables without leading pipe.
//
// Header 1 | Header 2
// -------- | --------
// Cell 1 | Cell 2
// Cell 3 | Cell 4
//
text = text.replace(new RegExp(
'^' + // Start of a line
'[ ]{0,' + less_than_tab + '}' + // Allowed whitespace.
'(\\S.*[|].*)\\n' + // $1: Header row (at least one pipe)
'[ ]{0,' + less_than_tab + '}' + // Allowed whitespace.
'([-:]+[ ]*[|][-| :]*)\\n' + // $2: Header underline
'(' + // $3: Cells
'(?:' +
'.*[|].*\\n' + // Row content
')*' +
')' +
'(?=\\n|\\x03)' , // Stop at final double newline.
'mg'
), _doTable_callback);
text = this.__unwrapSTXETX__(text);
return text;
};
/**
* Form HTML definition lists.
*/
MarkdownExtra_Parser.prototype.doDefLists = function(text) {
var self = this;
var less_than_tab = this.tab_width - 1;
// Re-usable pattern to match any entire dl list:
var whole_list_re = '(?:' +
'(' + // $1 = whole list
'(' + // $2
'[ ]{0,' + less_than_tab + '}' +
'((?:[ \\t]*\\S.*\\n)+)' + // $3 = defined term
// [porting note] Original regex from PHP is
// (?>.*\S.*\n), which matches a line with at
// least one non-space character. Change the
// first .* to [ \t]* stops unneccessary
// backtracking hence improves performance
'\\n?' +
'[ ]{0,' + less_than_tab + '}:[ ]+' + // colon starting definition
')' +
'([\\s\\S]+?)' +
'(' + // $4
'(?=\\0x03)' + // \z
'|' +
'(?=' + // [porting note] Our regex will consume leading
// newline characters so we will leave the newlines
// here for the next definition
'\\n{2,}' +
'(?=\\S)' +
'(?!' + // Negative lookahead for another term
'[ ]{0,' + less_than_tab + '}' +
'(?:\\S.*\\n)+?' + // defined term
'\\n?' +
'[ ]{0,' + less_than_tab + '}:[ ]+' + // colon starting definition
')' +
'(?!' + // Negative lookahead for another definition
'[ ]{0,' + less_than_tab + '}:[ ]+' + // colon starting definition
')' +
')' +
')' +
')' +
')'; // mx
text = this.__wrapSTXETX__(text);
text = text.replace(new RegExp(
'(\\x02\\n?|\\n\\n)' +
whole_list_re, 'mg'
), function(match, pre, list) {
//console.log(match);
// Re-usable patterns to match list item bullets and number markers:
// [portiong note] changed to list = $2 in order to reserve previously \n\n.
// Turn double returns into triple returns, so that we can make a
// paragraph for the last item in a list, if necessary:
var result = self._php_trim(self.processDefListItems(list));
result = "\n" + result + "\n
";
return pre + self.hashBlock(result) + "\n\n";
});
text = this.__unwrapSTXETX__(text);
return text;
};
/**
* Process the contents of a single definition list, splitting it
* into individual term and definition list items.
*/
MarkdownExtra_Parser.prototype.processDefListItems = function(list_str) {
var self = this;
var less_than_tab = this.tab_width - 1;
list_str = this.__wrapSTXETX__(list_str);
// trim trailing blank lines:
list_str = list_str.replace(/\n{2,}(?=\\x03)/, "\n");
// Process definition terms.
list_str = list_str.replace(new RegExp(
'(\\x02\\n?|\\n\\n+)' + // leading line
'(' + // definition terms = $1
'[ ]{0,' + less_than_tab + '}' + // leading whitespace
'(?![:][ ]|[ ])' + // negative lookahead for a definition
// mark (colon) or more whitespace.
'(?:\\S.*\\n)+?' + // actual term (not whitespace).
')' +
'(?=\\n?[ ]{0,3}:[ ])' , // lookahead for following line feed
// with a definition mark.
'mg'
), function(match, pre, terms_str) {
// [portiong note] changed to list = $2 in order to reserve previously \n\n.
var terms = self._php_trim(terms_str).split("\n");
var text = '';
for (var i = 0; i < terms.length; i++) {
var term = terms[i];
term = self.runSpanGamut(self._php_trim(term));
text += "\n" + term + " ";
}
return text + "\n";
});
// Process actual definitions.
list_str = list_str.replace(new RegExp(
'\\n(\\n+)?' + // leading line = $1
'(' + // marker space = $2
'[ ]{0,' + less_than_tab + '}' + // whitespace before colon
'[:][ ]+' + // definition mark (colon)
')' +
'([\\s\\S]+?)' + // definition text = $3
// [porting note] Maybe no trailing
// newlines in our version, changed the
// following line from \n+ to \n*.
'(?=\\n*' + // stop at next definition mark,
'(?:' + // next term or end of text
'\\n[ ]{0,' + less_than_tab + '}[:][ ]|' + // [porting note] do not match
// colon in the middle of a line
'|\\x03' + // \z
')' +
')',
'mg'
), function(match, leading_line, marker_space, def) {
if (leading_line || def.match(/\n{2,}/)) {
// Replace marker with the appropriate whitespace indentation
def = self._php_str_repeat(' ', marker_space.length) + def;
def = self.runBlockGamut(self.outdent(def + "\n\n"));
def = "\n" + def + "\n";
}
else {
def = self._php_rtrim(def);
def = self.runSpanGamut(self.outdent(def));
}
return "\n " + def + " \n";
});
list_str = this.__unwrapSTXETX__(list_str);
return list_str;
};
/**
* Adding the fenced code block syntax to regular Markdown:
*
* ~~~
* Code block
* ~~~
*/
MarkdownExtra_Parser.prototype.doFencedCodeBlocks = function(text) {
var self = this;
var less_than_tab = this.tab_width;
text = this.__wrapSTXETX__(text);
text = text.replace(new RegExp(
'(?:\\n|\\x02)' +
// 1: Opening marker
'(' +
'~{3,}' + // Marker: three tilde or more.
')' +
'[ ]*\\n' + // Whitespace and newline following marker.
// 2: Content
'(' +
'(?:' +
'(?=(' +
'(?!\\1[ ]*\\n)' + // Not a closing marker.
'.*\\n+' +
'))\\3' +
')+' +
')' +
// Closing marker.
'\\1[ ]*\\n',
"mg"
), function(match, m1, codeblock) {
codeblock = self._php_htmlspecialchars_ENT_NOQUOTES(codeblock);
codeblock = codeblock.replace(/^\n+/, function(match) {
return self._php_str_repeat("
";
return "\n\n" + self.hashBlock(codeblock) + "\n\n";
});
text = this.__unwrapSTXETX__(text);
return text;
};
/**
* Params:
* $text - string to process with html tags
*/
MarkdownExtra_Parser.prototype.formParagraphs = function(text) {
// Strip leading and trailing lines:
text = this.__wrapSTXETX__(text);
text = text.replace(/(?:\x02)\n+|\n+(?:\x03)/g, "");
text = this.__unwrapSTXETX__(text);
var grafs = text.split(/\n{2,}/m);
//preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
//
// Wrap
tags and unhashify HTML blocks
//
for(var i = 0; i < grafs.length; i++) {
var value = grafs[i];
if(value == "") {
// [porting note]
// This case is replacement for PREG_SPLIT_NO_EMPTY.
continue;
}
value = this._php_trim(this.runSpanGamut(value));
// Check if this should be enclosed in a paragraph.
// Clean tag hashes & block tag hashes are left alone.
var is_p = !value.match(/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/);
if (is_p) {
value = "
" + value + "
";
}
grafs[i] = value;
}
// Join grafs in one text, then unhash HTML tags.
text = grafs.join("\n\n");
// Finish by removing any tag hashes still present in $text.
text = this.unhash(text);
return text;
};
// ### Footnotes
/**
* Strips link definitions from text, stores the URLs and titles in
* hash references.
*/
MarkdownExtra_Parser.prototype.stripFootnotes = function(text) {
var self = this;
var less_than_tab = this.tab_width - 1;
// Link defs are in the form: [^id]: url "optional title"
text = text.replace(new RegExp(
'^[ ]{0,' + less_than_tab + '}\\[\\^(.+?)\\][ ]?:' + // note_id = $1
'[ ]*' +
'\\n?' + // maybe *one* newline
'(' + // text = $2 (no blank lines allowed)
'(?:' +
'.+' + // actual text
'|' +
'\\n' + // newlines but
'(?!\\[\\^.+?\\]:\\s)' + // negative lookahead for footnote marker.
'(?!\\n+[ ]{0,3}\\S)' + // ensure line is not blank and followed
// by non-indented content
')*' +
')',
"mg"
), function(match, m1, m2) {
var note_id = self.fn_id_prefix + m1;
self.footnotes[note_id] = self.outdent(m2);
return ''; //# String that will replace the block
});
return text;
};
/**
* Replace footnote references in $text [^id] with a special text-token
* which will be replaced by the actual footnote marker in appendFootnotes.
*/
MarkdownExtra_Parser.prototype.doFootnotes = function(text) {
if (!this.in_anchor) {
text = text.replace(/\[\^(.+?)\]/g, "F\x1Afn:$1\x1A:");
}
return text;
};
/**
* Append footnote list to text.
*/
MarkdownExtra_Parser.prototype.appendFootnotes = function(text) {
var self = this;
var _appendFootnotes_callback = function(match, m1) {
var node_id = self.fn_id_prefix + m1;
// Create footnote marker only if it has a corresponding footnote *and*
// the footnote hasn't been used by another marker.
if (node_id in self.footnotes) {
// Transfert footnote content to the ordered list.
self.footnotes_ordered.push([node_id, self.footnotes[node_id]]);
delete self.footnotes[node_id];
var num = self.footnote_counter++;
var attr = " rel=\"footnote\"";
if (self.fn_link_class != "") {
var classname = self.fn_link_class;
classname = self.encodeAttribute(classname);
attr += " class=\"" + classname + "\"";
}
if (self.fn_link_title != "") {
var title = self.fn_link_title;
title = self.encodeAttribute(title);
attr += " title=\"" + title +"\"";
}
attr = attr.replace(/%%/g, num);
node_id = self.encodeAttribute(node_id);
return "" +
"" + num + "" +
"";
}
return "[^" + m1 + "]";
};
text = text.replace(/F\x1Afn:(.*?)\x1A:/g, _appendFootnotes_callback);
if (this.footnotes_ordered.length > 0) {
text += "\n\n";
text += "\n";
text += "
\n\n";
var attr = " rev=\"footnote\"";
if (this.fn_backlink_class != "") {
var classname = this.fn_backlink_class;
classname = this.encodeAttribute(classname);
attr += " class=\"" + classname + "\"";
}
if (this.fn_backlink_title != "") {
var title = this.fn_backlink_title;
title = this.encodeAttribute(title);
attr += " title=\"" + title + "\"";
}
var num = 0;
while (this.footnotes_ordered.length > 0) {
var head = this.footnotes_ordered.shift();
var note_id = head[0];
var footnote = head[1];
footnote += "\n"; // Need to append newline before parsing.
footnote = this.runBlockGamut(footnote + "\n");
footnote = footnote.replace(/F\x1Afn:(.*?)\x1A:/g, _appendFootnotes_callback);
attr = attr.replace(/%%/g, ++num);
note_id = this.encodeAttribute(note_id);
// Add backlink to last paragraph; create new paragraph if needed.
var backlink = "↩";
if (footnote.match(/<\/p>$/)) {
footnote = footnote.substr(0, footnote.length - 4) + " " + backlink + "";
} else {
footnote += "\n\n" + backlink + "
";
}
text += "\n";
text += footnote + "\n";
text += " \n\n";
}
text += "\n";
text += "";
}
return text;
};
//### Abbreviations ###
/**
* Strips abbreviations from text, stores titles in hash references.
*/
MarkdownExtra_Parser.prototype.stripAbbreviations = function(text) {
var self = this;
var less_than_tab = this.tab_width - 1;
// Link defs are in the form: [id]*: url "optional title"
text = text.replace(new RegExp(
'^[ ]{0,' + less_than_tab + '}\\*\\[(.+?)\\][ ]?:' + // abbr_id = $1
'(.*)', // text = $2 (no blank lines allowed)
"m"
), function(match, abbr_word, abbr_desc) {
if (self.abbr_word_re != '') {
self.abbr_word_re += '|';
}
self.abbr_word_re += self._php_preg_quote(abbr_word);
self.abbr_desciptions[abbr_word] = self._php_trim(abbr_desc);
return ''; // String that will replace the block
});
return text;
};
/**
* Find defined abbreviations in text and wrap them in elements.
*/
MarkdownExtra_Parser.prototype.doAbbreviations = function(text) {
var self = this;
if (this.abbr_word_re) {
// cannot use the /x modifier because abbr_word_re may
// contain significant spaces:
text = text.replace(new RegExp(
'(^|[^\\w\\x1A])' +
'(' + this.abbr_word_re + ')' +
'(?![\\w\\x1A])'
), function(match, prev, abbr) {
if (abbr in self.abbr_desciptions) {
var desc = self.abbr_desciptions[abbr];
if (!desc || desc == "") {
return self.hashPart("" + abbr + "");
} else {
desc = self.encodeAttribute(desc);
return self.hashPart("" + abbr + "");
}
} else {
return match;
}
});
}
return text;
};
/**
* Export to Node.js
*/
this.Markdown = Markdown;
this.Markdown_Parser = Markdown_Parser;
this.MarkdownExtra_Parser = MarkdownExtra_Parser;