1286 lines
40 KiB
JavaScript
1286 lines
40 KiB
JavaScript
/***
|
|
* Textile parser for JavaScript
|
|
*
|
|
* Copyright (c) 2012 Borgar Þorsteinsson (MIT License).
|
|
*
|
|
*/
|
|
/*jshint
|
|
laxcomma:true
|
|
laxbreak:true
|
|
eqnull:true
|
|
loopfunc:true
|
|
sub:true
|
|
*/
|
|
;(function(){
|
|
"use strict";
|
|
|
|
/***
|
|
* Regular Expression helper methods
|
|
*
|
|
* This provides the `re` object, which contains several helper
|
|
* methods for working with big regular expressions (soup).
|
|
*
|
|
*/
|
|
var re = {
|
|
_cache: {}
|
|
, pattern: {
|
|
'punct': "[!-/:-@\\[\\\\\\]-`{-~]"
|
|
, 'space': '\\s'
|
|
}
|
|
, escape: function ( src ) {
|
|
return src.replace( /[\-\[\]\{\}\(\)\*\+\?\.\,\\\^\$\|\#\s]/g, "\\$&" );
|
|
}
|
|
, collapse: function ( src ) {
|
|
return src.replace( /(?:#.*?(?:\n|$))/g, '' )
|
|
.replace( /\s+/g, '' )
|
|
;
|
|
}
|
|
, expand_patterns: function ( src ) {
|
|
// TODO: provide escape for patterns: \[:pattern:] ?
|
|
return src.replace( /\[\:\s*(\w+)\s*\:\]/g, function ( m, k ) {
|
|
return ( k in re.pattern )
|
|
? re.expand_patterns( re.pattern[ k ] )
|
|
: k
|
|
;
|
|
})
|
|
;
|
|
}
|
|
, isRegExp: function ( r ) {
|
|
return Object.prototype.toString.call( r ) === "[object RegExp]";
|
|
}
|
|
, compile: function ( src, flags ) {
|
|
if ( re.isRegExp( src ) ) {
|
|
if ( arguments.length === 1 ) { // no flags arg provided, use the RegExp one
|
|
flags = ( src.global ? 'g' : '' ) +
|
|
( src.ignoreCase ? 'i' : '' ) +
|
|
( src.multiline ? 'm' : '' );
|
|
}
|
|
src = src.source;
|
|
}
|
|
// don't do the same thing twice
|
|
var ckey = src + ( flags || '' );
|
|
if ( ckey in re._cache ) { return re._cache[ ckey ]; }
|
|
// allow classes
|
|
var rx = re.expand_patterns( src );
|
|
// allow verbose expressions
|
|
if ( flags && /x/.test( flags ) ) {
|
|
rx = re.collapse( rx );
|
|
}
|
|
// allow dotall expressions
|
|
if ( flags && /s/.test( flags ) ) {
|
|
rx = rx.replace( /([^\\])\./g, '$1[^\\0]' );
|
|
}
|
|
// TODO: test if MSIE and add replace \s with [\s\u00a0] if it is?
|
|
// clean flags and output new regexp
|
|
flags = ( flags || '' ).replace( /[^gim]/g, '' );
|
|
return ( re._cache[ ckey ] = new RegExp( rx, flags ) );
|
|
}
|
|
};
|
|
|
|
|
|
|
|
|
|
/***
|
|
* JSONML helper methods - http://www.jsonml.org/
|
|
*
|
|
* This provides the `JSONML` object, which contains helper
|
|
* methods for rendering JSONML to HTML.
|
|
*
|
|
* Note that the tag ! is taken to mean comment, this is however
|
|
* not specified in the JSONML spec.
|
|
*
|
|
*/
|
|
var JSONML = {
|
|
escape: function ( text, esc_quotes ) {
|
|
return text.replace( /&(?!(#\d{2,}|#x[\da-fA-F]{2,}|[a-zA-Z][a-zA-Z1-4]{1,6});)/g, "&" )
|
|
.replace( /</g, "<" )
|
|
.replace( />/g, ">" )
|
|
.replace( /"/g, esc_quotes ? """ : '"' )
|
|
.replace( /'/g, esc_quotes ? "'" : "'" )
|
|
;
|
|
}
|
|
, toHTML: function ( jsonml ) {
|
|
|
|
jsonml = jsonml.concat();
|
|
|
|
// basic case
|
|
if ( typeof jsonml === "string" ) {
|
|
return JSONML.escape( jsonml );
|
|
}
|
|
|
|
var tag = jsonml.shift()
|
|
, attributes = {}
|
|
, content = []
|
|
, tag_attrs = ""
|
|
, a
|
|
;
|
|
if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !_isArray( jsonml[ 0 ] ) ) {
|
|
attributes = jsonml.shift();
|
|
}
|
|
|
|
while ( jsonml.length ) {
|
|
content.push( JSONML.toHTML( jsonml.shift() ) );
|
|
}
|
|
|
|
for ( a in attributes ) {
|
|
tag_attrs += ( attributes[ a ] == null )
|
|
? " " + a
|
|
: " " + a + '="' + JSONML.escape( attributes[ a ], true ) + '"'
|
|
;
|
|
}
|
|
|
|
// be careful about adding whitespace here for inline elements
|
|
if ( tag == "!" ) {
|
|
return "<!--" + content.join( "" ) + "-->";
|
|
}
|
|
else if ( tag === "img" || tag === "br" || tag === "hr" || tag === "input" ) {
|
|
return "<" + tag + tag_attrs + " />";
|
|
}
|
|
else {
|
|
return "<" + tag + tag_attrs + ">" + content.join( "" ) + "</" + tag + ">";
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
// merge object b properties into obect a
|
|
function merge ( a, b ) {
|
|
for ( var k in b ) {
|
|
a[ k ] = b[ k ];
|
|
}
|
|
return a;
|
|
}
|
|
|
|
|
|
var _isArray = Array.isArray || function ( a ) { return Object.prototype.toString.call(a) === '[object Array]'; };
|
|
|
|
/* expressions */
|
|
re.pattern[ 'blocks' ] = '(?:b[qc]|div|notextile|pre|h[1-6]|fn\\d+|p|###)';
|
|
re.pattern[ 'pba_class' ] = '\\([^\\)]+\\)';
|
|
re.pattern[ 'pba_style' ] = '\\{[^\\}]+\\}';
|
|
re.pattern[ 'pba_lang' ] = '\\[[^\\[\\]]+\\]';
|
|
re.pattern[ 'pba_align' ] = '(?:<>|<|>|=)';
|
|
re.pattern[ 'pba_pad' ] = '[\\(\\)]+';
|
|
re.pattern[ 'pba_attr' ] = '(?:[:pba_class:]|[:pba_style:]|[:pba_lang:]|[:pba_align:]|[:pba_pad:])*';
|
|
re.pattern[ 'url_punct' ] = '[.,«»″‹›!?]';
|
|
re.pattern[ 'html_id' ] = '[a-zA-Z][a-zA-Z\\d:]*';
|
|
re.pattern[ 'html_attr' ] = '(?:"[^"]+"|\'[^\']+\'|[^>\\s]+)';
|
|
re.pattern[ 'tx_urlch' ] = '[\\w"$\\-_.+!*\'(),";\\/?:@=&%#{}|\\\\^~\\[\\]`]';
|
|
re.pattern[ 'tx_cite' ] = ':((?:[^\\s()]|\\([^\\s()]+\\)|[()])+?)(?=[!-\\.:-@\\[\\\\\\]-`{-~]+(?:$|\\s)|$|\\s)';
|
|
re.pattern[ 'ucaps' ] = "A-Z"+
|
|
// Latin extended À-Þ
|
|
"\u00c0-\u00d6\u00d8-\u00de"+
|
|
// Latin caps with embelishments and ligatures...
|
|
"\u0100\u0102\u0104\u0106\u0108\u010a\u010c\u010e\u0110\u0112\u0114\u0116\u0118\u011a\u011c\u011e\u0120\u0122\u0124\u0126\u0128\u012a\u012c\u012e\u0130\u0132\u0134\u0136\u0139\u013b\u013d\u013f"+
|
|
"\u0141\u0143\u0145\u0147\u014a\u014c\u014e\u0150\u0152\u0154\u0156\u0158\u015a\u015c\u015e\u0160\u0162\u0164\u0166\u0168\u016a\u016c\u016e\u0170\u0172\u0174\u0176\u0178\u0179\u017b\u017d"+
|
|
"\u0181\u0182\u0184\u0186\u0187\u0189-\u018b\u018e-\u0191\u0193\u0194\u0196-\u0198\u019c\u019d\u019f\u01a0\u01a2\u01a4\u01a6\u01a7\u01a9\u01ac\u01ae\u01af\u01b1-\u01b3\u01b5\u01b7\u01b8\u01bc"+
|
|
"\u01c4\u01c7\u01ca\u01cd\u01cf\u01d1\u01d3\u01d5\u01d7\u01d9\u01db\u01de\u01e0\u01e2\u01e4\u01e6\u01e8\u01ea\u01ec\u01ee\u01f1\u01f4\u01f6-\u01f8\u01fa\u01fc\u01fe"+
|
|
"\u0200\u0202\u0204\u0206\u0208\u020a\u020c\u020e\u0210\u0212\u0214\u0216\u0218\u021a\u021c\u021e\u0220\u0222\u0224\u0226\u0228\u022a\u022c\u022e\u0230\u0232\u023a\u023b\u023d\u023e"+
|
|
"\u0241\u0243-\u0246\u0248\u024a\u024c\u024e"+
|
|
"\u1e00\u1e02\u1e04\u1e06\u1e08\u1e0a\u1e0c\u1e0e\u1e10\u1e12\u1e14\u1e16\u1e18\u1e1a\u1e1c\u1e1e\u1e20\u1e22\u1e24\u1e26\u1e28\u1e2a\u1e2c\u1e2e\u1e30\u1e32\u1e34\u1e36\u1e38\u1e3a\u1e3c\u1e3e\u1e40"+
|
|
"\u1e42\u1e44\u1e46\u1e48\u1e4a\u1e4c\u1e4e\u1e50\u1e52\u1e54\u1e56\u1e58\u1e5a\u1e5c\u1e5e\u1e60\u1e62\u1e64\u1e66\u1e68\u1e6a\u1e6c\u1e6e\u1e70\u1e72\u1e74\u1e76\u1e78\u1e7a\u1e7c\u1e7e"+
|
|
"\u1e80\u1e82\u1e84\u1e86\u1e88\u1e8a\u1e8c\u1e8e\u1e90\u1e92\u1e94\u1e9e\u1ea0\u1ea2\u1ea4\u1ea6\u1ea8\u1eaa\u1eac\u1eae\u1eb0\u1eb2\u1eb4\u1eb6\u1eb8\u1eba\u1ebc\u1ebe"+
|
|
"\u1ec0\u1ec2\u1ec4\u1ec6\u1ec8\u1eca\u1ecc\u1ece\u1ed0\u1ed2\u1ed4\u1ed6\u1ed8\u1eda\u1edc\u1ede\u1ee0\u1ee2\u1ee4\u1ee6\u1ee8\u1eea\u1eec\u1eee\u1ef0\u1ef2\u1ef4\u1ef6\u1ef8\u1efa\u1efc\u1efe"+
|
|
"\u2c60\u2c62-\u2c64\u2c67\u2c69\u2c6b\u2c6d-\u2c70\u2c72\u2c75\u2c7e\u2c7f"+
|
|
"\ua722\ua724\ua726\ua728\ua72a\ua72c\ua72e\ua732\ua734\ua736\ua738\ua73a\ua73c\ua73e"+
|
|
"\ua740\ua742\ua744\ua746\ua748\ua74a\ua74c\ua74e\ua750\ua752\ua754\ua756\ua758\ua75a\ua75c\ua75e\ua760\ua762\ua764\ua766\ua768\ua76a\ua76c\ua76e\ua779\ua77b\ua77d\ua77e"+
|
|
"\ua780\ua782\ua784\ua786\ua78b\ua78d\ua790\ua792\ua7a0\ua7a2\ua7a4\ua7a6\ua7a8\ua7aa";
|
|
|
|
var re_block = re.compile( /^([:blocks:])/ )
|
|
, re_block_se = re.compile( /^[:blocks:]$/ )
|
|
, re_block_normal = re.compile( /^(.*?)($|\n(?:\s*\n|$)+)/, 's' )
|
|
, re_block_extended = re.compile( /^(.*?)($|\n+(?=[:blocks:][:pba_attr:]\.))/, 's' )
|
|
, re_ruler = /^(\-\-\-+|\*\*\*+|___+)(\n\s+|$)/
|
|
, re_list = re.compile( /^((?:[\t ]*[\#\*]+[:pba_attr:] .+?(?:\n|$))+)(\s*\n)?/ )
|
|
, re_list_item = /^([\#\*]+)(.+?)(\n|$)/
|
|
, re_table = re.compile( /^((?:table[:pba_attr:]\.\n)?(?:(?:[:pba_attr:]\.[^\n\S]*)?\|.*?\|[^\n\S]*(?:\n|$))+)([^\n\S]*\n)?/, 's' )
|
|
, re_table_head = /^table(_?)([^\n]+)\.\s?\n/
|
|
, re_table_row = re.compile( /^([:pba_attr:]\.[^\n\S]*)?\|(.*?)\|[^\n\S]*(\n|$)/, 's' )
|
|
, re_fenced_phrase = /^\[(__?|\*\*?|\?\?|[\-\+\^~@%])([^\n]+)\1\]/
|
|
, re_phrase = /^([\[\{]?)(__?|\*\*?|\?\?|[\-\+\^~@%])/
|
|
, re_text = re.compile( /^.+?(?=[\\<!\[_\*`]|\n|$)/, 's' )
|
|
, re_image = re.compile( /^!(?!\s)([:pba_attr:](?:\.[^\n\S]|\.(?:[^\.\/]))?)([^!\s]+?) ?(?:\(((?:[^\(\)]+|\([^\(\)]+\))+)\))?!(?::([^\s]+?(?=[!-\.:-@\[\\\]-`{-~](?:$|\s)|\s|$)))?/ )
|
|
, re_image_fenced = re.compile( /^\[!(?!\s)([:pba_attr:](?:\.[^\n\S]|\.(?:[^\.\/]))?)([^!\s]+?) ?(?:\(((?:[^\(\)]+|\([^\(\)]+\))+)\))?!(?::([^\s]+?(?=[!-\.:-@\[\\\]-`{-~](?:$|\s)|\s|$)))?\]/ )
|
|
// NB: there is an exception in here to prevent matching "TM)"
|
|
, re_caps = re.compile( /^((?!TM\)|tm\))[[:ucaps:]](?:[[:ucaps:]\d]{1,}(?=\()|[[:ucaps:]\d]{2,}))(?:\((.*?)\))?(?=\W|$)/ )
|
|
, re_link = re.compile( /^"(?!\s)((?:[^\n"]|"(?![\s:])[^\n"]+"(?!:))+)"[:tx_cite:]/ )
|
|
, re_link_fenced = /^\["([^\n]+?)":((?:\[[a-z0-9]*\]|[^\]])+)\]/
|
|
, re_link_ref = re.compile( /^\[([^\]]+)\]((?:https?:\/\/|\/)\S+)(?:\s*\n|$)/ )
|
|
, re_link_title = /\s*\(((?:\([^\(\)]*\)|[^\(\)]+)+)\)$/
|
|
, re_footnote_def = /^fn\d+$/
|
|
, re_footnote = /^\[(\d+)\]/
|
|
|
|
// HTML
|
|
, re_html_tag_block = re.compile( /^\s*<([:html_id:](?::[a-zA-Z\d]+)*)((?:\s[^=\s\/]+(?:\s*=\s*[:html_attr:])?)+)?\s*(\/?)>(\n*)/ )
|
|
, re_html_tag = re.compile( /^<([:html_id:])((?:\s[^=\s\/]+(?:\s*=\s*[:html_attr:])?)+)?\s*(\/?)>(\n*)/ )
|
|
, re_html_comment = re.compile( /^<!--(.+?)-->/, 's' )
|
|
, re_html_end_tag = re.compile( /^<\/([:html_id:])([^>]*)>/ )
|
|
, re_html_attr = re.compile( /^\s*([^=\s]+)(?:\s*=\s*("[^"]+"|'[^']+'|[^>\s]+))?/ )
|
|
, re_entity = /&(#\d\d{2,}|#x[\da-fA-F]{2,}|[a-zA-Z][a-zA-Z1-4]{1,6});/
|
|
|
|
// glyphs
|
|
, re_dimsign = /([\d\.,]+['"]? ?)x( ?)(?=[\d\.,]['"]?)/g
|
|
, re_emdash = /(^|[\s\w])--([\s\w]|$)/g
|
|
, re_trademark = /(\b ?|\s|^)(?:\((?:TM|tm)\)|\[(?:TM|tm)\])/g
|
|
, re_registered = /(\b ?|\s|^)(?:\(R\)|\[R\])/gi
|
|
, re_copyright = /(\b ?|\s|^)(?:\(C\)|\[C\])/gi
|
|
, re_apostrophe = /(\w)\'(\w)/g
|
|
, re_double_prime = re.compile( /(\d*[\.,]?\d+)"(?=\s|$|[:punct:])/g )
|
|
, re_single_prime = re.compile( /(\d*[\.,]?\d+)'(?=\s|$|[:punct:])/g )
|
|
, re_closing_dquote = re.compile( /([^\s\[\(])"(?=$|\s|[:punct:])/g )
|
|
, re_closing_squote = re.compile( /([^\s\[\(])'(?=$|\s|[:punct:])/g )
|
|
|
|
// pba
|
|
, re_pba_classid = /^\(([^\(\)\n]+)\)/
|
|
, re_pba_padding_l = /^([\(]+)/
|
|
, re_pba_padding_r = /^([\)]+)/
|
|
, re_pba_align_blk = /^(<>|<|>|=)/
|
|
, re_pba_align_img = /^(<|>|=)/
|
|
, re_pba_valign = /^(~|\^|\-)/
|
|
, re_pba_colspan = /^\\(\d+)/
|
|
, re_pba_rowspan = /^\/(\d+)/
|
|
, re_pba_styles = /^\{([^\}]*)\}/
|
|
, re_pba_css = /^\s*([^:\s]+)\s*:\s*(.+)\s*$/
|
|
, re_pba_lang = /^\[([^\[\]]+)\]/
|
|
;
|
|
|
|
var phrase_convert = {
|
|
'*': 'strong'
|
|
, '**': 'b'
|
|
, '??': 'cite'
|
|
, '_': 'em'
|
|
, '__': 'i'
|
|
, '-': 'del'
|
|
, '%': 'span'
|
|
, '+': 'ins'
|
|
, '~': 'sub'
|
|
, '^': 'sup'
|
|
, '@': 'code'
|
|
};
|
|
|
|
// area, base, basefont, bgsound, br, col, command, embed, frame, hr,
|
|
// img, input, keygen, link, meta, param, source, track or wbr
|
|
var html_singletons = {
|
|
'br': 1
|
|
, 'hr': 1
|
|
, 'img': 1
|
|
, 'link': 1
|
|
, 'meta': 1
|
|
, 'wbr': 1
|
|
, 'area': 1
|
|
, 'param': 1
|
|
, 'input': 1
|
|
, 'option': 1
|
|
, 'base': 1
|
|
};
|
|
|
|
var pba_align_lookup = {
|
|
'<': 'left'
|
|
, '=': 'center'
|
|
, '>': 'right'
|
|
, '<>': 'justify'
|
|
};
|
|
|
|
var pba_valign_lookup = {
|
|
'~':'bottom'
|
|
, '^':'top'
|
|
, '-':'middle'
|
|
};
|
|
|
|
// HTML tags allowed in the document (root) level that trigger HTML parsing
|
|
var allowed_blocktags = {
|
|
'p': 0
|
|
, 'hr': 0
|
|
, 'ul': 1
|
|
, 'ol': 0
|
|
, 'li': 0
|
|
, 'div': 1
|
|
, 'pre': 0
|
|
, 'object': 1
|
|
, 'script': 0
|
|
, 'noscript': 0
|
|
, 'blockquote': 1
|
|
, 'notextile': 1
|
|
};
|
|
|
|
|
|
function ribbon ( feed ) {
|
|
var _slot = null
|
|
, org = feed + ''
|
|
, pos = 0
|
|
;
|
|
return {
|
|
save: function () {
|
|
_slot = pos;
|
|
}
|
|
, load: function () {
|
|
pos = _slot;
|
|
feed = org.slice( pos );
|
|
}
|
|
, advance: function ( n ) {
|
|
pos += ( typeof n === 'string' ) ? n.length : n;
|
|
return ( feed = org.slice( pos ) );
|
|
}
|
|
, lookbehind: function ( nchars ) {
|
|
nchars = nchars == null ? 1 : nchars;
|
|
return org.slice( pos - nchars, pos );
|
|
}
|
|
, startsWith: function ( s ) {
|
|
return feed.substring(0, s.length) === s;
|
|
}
|
|
, valueOf: function(){
|
|
return feed;
|
|
}
|
|
, toString: function(){
|
|
return feed;
|
|
}
|
|
};
|
|
}
|
|
|
|
|
|
function builder ( arr ) {
|
|
var _arr = _isArray( arr ) ? arr : [];
|
|
return {
|
|
add: function ( node ) {
|
|
if ( typeof node === 'string' &&
|
|
typeof _arr[_arr.length - 1 ] === 'string' ) {
|
|
// join if possible
|
|
_arr[ _arr.length - 1 ] += node;
|
|
}
|
|
else if ( _isArray( node ) ) {
|
|
var f = node.filter(function(s){ return s !== undefined; });
|
|
_arr.push( f );
|
|
}
|
|
else if ( node ) {
|
|
_arr.push( node );
|
|
}
|
|
return this;
|
|
}
|
|
, merge: function ( s ) {
|
|
for (var i=0,l=s.length; i<l; i++) {
|
|
this.add( s[i] );
|
|
}
|
|
return this;
|
|
}
|
|
, linebreak: function () {
|
|
if ( _arr.length ) {
|
|
this.add( '\n' );
|
|
}
|
|
}
|
|
, get: function () {
|
|
return _arr;
|
|
}
|
|
};
|
|
}
|
|
|
|
|
|
function copy_pba ( s, blacklist ) {
|
|
if ( !s ) { return undefined; }
|
|
var k, d = {};
|
|
for ( k in s ) {
|
|
if ( k in s && ( !blacklist || !(k in blacklist) ) ) {
|
|
d[ k ] = s[ k ];
|
|
}
|
|
}
|
|
return d;
|
|
}
|
|
|
|
|
|
function parse_html_attr ( attr ) {
|
|
// parse ATTR and add to element
|
|
var _attr = {}
|
|
, m
|
|
, val
|
|
;
|
|
while ( (m = re_html_attr.exec( attr )) ) {
|
|
_attr[ m[1] ] = ( typeof m[2] === 'string' )
|
|
? m[2].replace( /^(["'])(.*)\1$/, '$2' )
|
|
: null
|
|
;
|
|
attr = attr.slice( m[0].length );
|
|
}
|
|
return _attr;
|
|
}
|
|
|
|
|
|
// This "indesciminately" parses HTML text into a list of JSON-ML element
|
|
// No steps are taken however to prevent things like <table><p><td> - user can still create nonsensical but "well-formed" markup
|
|
function parse_html ( src, whitelist_tags ) {
|
|
var org = src + ''
|
|
, list = []
|
|
, root = list
|
|
, _stack = []
|
|
, m
|
|
, oktag = whitelist_tags ? function ( tag ) { return tag in whitelist_tags; } : function () { return true; }
|
|
, tag
|
|
;
|
|
src = (typeof src === 'string') ? ribbon( src ) : src;
|
|
// loop
|
|
do {
|
|
|
|
if ( (m = re_html_comment.exec( src )) && oktag('!') ) {
|
|
src.advance( m[0] );
|
|
list.push( [ '!', m[1] ] );
|
|
}
|
|
|
|
// end tag
|
|
else if ( (m = re_html_end_tag.exec( src )) && oktag(m[1]) ) {
|
|
tag = m[1];
|
|
var junk = m[2];
|
|
if ( _stack.length ) {
|
|
for (var i=_stack.length-1; i>=0; i--) {
|
|
var head = _stack[i];
|
|
if ( head[0] === tag ) {
|
|
_stack.splice( i );
|
|
list = _stack[ _stack.length - 1 ] || root;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
src.advance( m[0] );
|
|
}
|
|
|
|
// open/void tag
|
|
else if ( (m = re_html_tag.exec( src )) && oktag(m[1]) ) {
|
|
src.advance( m[0] );
|
|
tag = m[1];
|
|
var single = m[3] || m[1] in html_singletons
|
|
, tail = m[4]
|
|
, element = [ tag ]
|
|
;
|
|
|
|
// attributes
|
|
if ( m[2] ) { element.push( parse_html_attr( m[2] ) ); }
|
|
|
|
// tag
|
|
if ( single ) { // single tag
|
|
// let us add the element and continue our quest...
|
|
list.push( element );
|
|
if ( tail ) { list.push( tail ); }
|
|
}
|
|
else { // open tag
|
|
if ( tail ) { element.push( tail ); }
|
|
|
|
// TODO: some things auto close other things: <td>, <li>, <p>, <table>
|
|
// if ( tag === 'p' && _stack.length ) {
|
|
// var seek = /^(p)$/;
|
|
// for (var i=_stack.length-1; i>=0; i--) {
|
|
// var head = _stack[i];
|
|
// if ( seek.test( head[0] ) /* === tag */ ) {
|
|
// //src.advance( m[0] );
|
|
// _stack.splice( i );
|
|
// list = _stack[i] || root;
|
|
// }
|
|
// }
|
|
// }
|
|
|
|
// TODO: some elements can move parser into "text" mode
|
|
// style, xmp, iframe, noembed, noframe, textarea, title, script, noscript, plaintext
|
|
//if ( /^(script)$/.test( tag ) ) { }
|
|
|
|
_stack.push( element );
|
|
list.push( element );
|
|
list = element;
|
|
|
|
}
|
|
}
|
|
else {
|
|
|
|
// no match, move by all "uninteresting" chars
|
|
m = /([^<]+|[^\0])/.exec( src );
|
|
if ( m ) {
|
|
list.push( m[0] );
|
|
}
|
|
src.advance( m ? m[0].length || 1 : 1 );
|
|
|
|
}
|
|
|
|
}
|
|
while ( src.valueOf() );
|
|
return root;
|
|
}
|
|
|
|
/* attribute parser */
|
|
|
|
function parse_attr ( input, element, end_token ) {
|
|
/*
|
|
The attr bit causes massive problems for span elements when parens are used.
|
|
Parens are a total mess and, unsurprisingly, causes trip ups:
|
|
|
|
RC: `_{display:block}(span) span (span)_` -> `<em style="display:block;" class="span">(span) span (span)</em>`
|
|
PHP: `_{display:block}(span) span (span)_` -> `<em style="display:block;">(span) span (span)</em>`
|
|
|
|
PHP and RC seem to mostly solve this by not parsing a final attr parens on spans if the
|
|
following character is a non-space. I've duplicated that: Class/ID is not matched on spans
|
|
if it is followed by `end_token` or <space>.
|
|
*/
|
|
input += '';
|
|
if ( !input || element === 'notextile' ) { return undefined; }
|
|
|
|
var m
|
|
, st = {}
|
|
, o = { 'style': st }
|
|
, remaining = input
|
|
, is_block = element === 'table' || element === 'td' || re_block_se.test( element ) // "in" test would be better but what about fn#.?
|
|
, is_img = element === 'img'
|
|
, is_phrase = !is_block && !is_img && element !== 'a'
|
|
, re_pba_align = ( is_img ) ? re_pba_align_img : re_pba_align_blk
|
|
;
|
|
|
|
do {
|
|
|
|
if ( (m = re_pba_styles.exec( remaining )) ) {
|
|
m[1].split(';').forEach(function(p){
|
|
var d = p.match( re_pba_css );
|
|
if ( d ) { st[ d[1] ] = d[2]; }
|
|
});
|
|
remaining = remaining.slice( m[0].length );
|
|
continue;
|
|
}
|
|
|
|
if ( (m = re_pba_lang.exec( remaining )) ) {
|
|
o['lang'] = m[1];
|
|
remaining = remaining.slice( m[0].length );
|
|
continue;
|
|
}
|
|
|
|
if ( (m = re_pba_classid.exec( remaining )) ) {
|
|
var rm = remaining.slice( m[0].length );
|
|
if (
|
|
( !rm && is_phrase ) ||
|
|
( end_token && (rm[0] === ' ' || end_token === rm.slice(0,end_token.length)) )
|
|
) {
|
|
m = null;
|
|
continue;
|
|
}
|
|
var bits = m[1].split( '#' );
|
|
if ( bits[0] ) { o['class'] = bits[0]; }
|
|
if ( bits[1] ) { o['id'] = bits[1]; }
|
|
remaining = rm;
|
|
continue;
|
|
}
|
|
|
|
if ( is_block ) {
|
|
if ( (m = re_pba_padding_l.exec( remaining )) ) {
|
|
st[ "padding-left" ] = ( m[1].length ) + "em";
|
|
remaining = remaining.slice( m[0].length );
|
|
continue;
|
|
}
|
|
if ( (m = re_pba_padding_r.exec( remaining )) ) {
|
|
st[ "padding-right" ] = ( m[1].length ) + "em";
|
|
remaining = remaining.slice( m[0].length );
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// only for blocks:
|
|
if ( is_img || is_block ) {
|
|
if ( (m = re_pba_align.exec( remaining )) ) {
|
|
var align = pba_align_lookup[ m[1] ];
|
|
if ( is_img ) {
|
|
o[ 'align' ] = align;
|
|
}
|
|
else {
|
|
st[ 'text-align' ] = align;
|
|
}
|
|
remaining = remaining.slice( m[0].length );
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// only for table cells
|
|
if ( element === 'td' || element === 'tr' ) {
|
|
if ( (m = re_pba_valign.exec( remaining )) ) {
|
|
st[ "vertical-align" ] = pba_valign_lookup[ m[1] ];
|
|
remaining = remaining.slice( m[0].length );
|
|
continue;
|
|
}
|
|
}
|
|
if ( element === 'td' ) {
|
|
if ( (m = re_pba_colspan.exec( remaining )) ) {
|
|
o[ "colspan" ] = m[1];
|
|
remaining = remaining.slice( m[0].length );
|
|
continue;
|
|
}
|
|
if ( (m = re_pba_rowspan.exec( remaining )) ) {
|
|
o[ "rowspan" ] = m[1];
|
|
remaining = remaining.slice( m[0].length );
|
|
continue;
|
|
}
|
|
}
|
|
|
|
}
|
|
while ( m );
|
|
|
|
// collapse styles
|
|
var s = [];
|
|
for ( var v in st ) { s.push( v + ':' + st[v] ); }
|
|
if ( s.length ) { o.style = s.join(';'); } else { delete o.style; }
|
|
|
|
return remaining == input
|
|
? undefined
|
|
: [ input.length - remaining.length, o ]
|
|
;
|
|
}
|
|
|
|
|
|
|
|
/* glyph parser */
|
|
|
|
function parse_glyphs ( src ) {
|
|
if ( typeof src !== 'string' ) { return src; }
|
|
// NB: order is important here ...
|
|
return src
|
|
// arrow
|
|
.replace( /([^\-]|^)->/, '$1→' ) // arrow
|
|
// dimensions
|
|
.replace( re_dimsign, '$1×$2' ) // dimension sign
|
|
// ellipsis
|
|
.replace( /([^.]?)\.{3}/g, '$1…' ) // ellipsis
|
|
// dashes
|
|
.replace( re_emdash, '$1—$2' ) // em dash
|
|
.replace( /( )-( )/g, '$1–$2' ) // en dash
|
|
// legal marks
|
|
.replace( re_trademark, '$1™' ) // trademark
|
|
.replace( re_registered, '$1®' ) // registered
|
|
.replace( re_copyright, '$1©' ) // copyright
|
|
// double quotes
|
|
.replace( re_double_prime, '$1″' ) // double prime
|
|
.replace( re_closing_dquote, '$1”' ) // double closing quote
|
|
.replace( /"/g, '“' ) // double opening quote
|
|
// single quotes
|
|
.replace( re_single_prime, '$1′' ) // single prime
|
|
.replace( re_apostrophe, '$1’$2' ) // I'm an apostrophe
|
|
.replace( re_closing_squote, '$1’' ) // single closing quote
|
|
.replace( /'/g, '‘' )
|
|
;
|
|
}
|
|
|
|
|
|
/* list parser */
|
|
|
|
function parse_list ( src, options ) {
|
|
|
|
src = ribbon( src.replace( /(^|\n)[\t ]+/, '$1' ) );
|
|
var pad = function ( n ) {
|
|
var s = '\n';
|
|
while ( n-- ) { s += '\t'; }
|
|
return s;
|
|
}
|
|
, stack = []
|
|
, m
|
|
, s
|
|
;
|
|
|
|
while ( (m = re_list_item.exec( src )) ) {
|
|
|
|
var item = [ 'li' ]
|
|
, pba = parse_attr( m[2], 'li' )
|
|
;
|
|
if ( pba ) {
|
|
m[2] = m[2].slice( pba[0] );
|
|
pba = pba[1];
|
|
}
|
|
|
|
var dest_level = m[1].length
|
|
, type = m[1].substr(-1) === '#' ? 'ol' : 'ul'
|
|
, eqlev = stack.length === dest_level
|
|
, new_li = null
|
|
, lst
|
|
, par
|
|
, r
|
|
;
|
|
// create nesting until we have correct level
|
|
while ( stack.length < dest_level ) {
|
|
lst = [ type, pad( stack.length + 1 ), (new_li = [ 'li' ]) ];
|
|
par = stack[ stack.length - 1 ];
|
|
if ( par ) {
|
|
par.li.push( pad( stack.length ) );
|
|
par.li.push( lst );
|
|
}
|
|
stack.push({ ul: lst, li: new_li });
|
|
}
|
|
// remove nesting until we have correct level
|
|
while ( stack.length > dest_level ) {
|
|
r = stack.pop();
|
|
r.ul.push( pad( stack.length ) );
|
|
}
|
|
par = stack[ stack.length - 1 ];
|
|
if ( !new_li ) {
|
|
par.ul.push( pad( stack.length ), item );
|
|
par.li = item;
|
|
}
|
|
if ( pba ) { par.li.push( pba ); }
|
|
Array.prototype.push.apply( par.li, parse_inline( m[2].trim(), options ) );
|
|
|
|
src.advance( m[0] );
|
|
}
|
|
|
|
while ( stack.length ) {
|
|
s = stack.pop();
|
|
s.ul.push( pad( stack.length ) );
|
|
}
|
|
|
|
return s.ul;
|
|
}
|
|
|
|
|
|
|
|
/* table parser */
|
|
|
|
function parse_table ( src, options ) {
|
|
src = ribbon( src.trim() );
|
|
var table = [ 'table' ]
|
|
, row
|
|
, inner
|
|
, pba
|
|
, more
|
|
, m
|
|
;
|
|
|
|
if ( (m = re_table_head.exec( src )) ) {
|
|
// parse and apply table attr
|
|
src.advance( m[0] );
|
|
pba = parse_attr( m[2], 'table' );
|
|
if ( pba ) {
|
|
table.push( pba[1] );
|
|
}
|
|
}
|
|
|
|
while ( (m = re_table_row.exec( src )) ) {
|
|
row = [ 'tr' ];
|
|
|
|
if ( m[1] && (pba = parse_attr( m[1], 'tr' )) ) {
|
|
// FIXME: requires "\.\s?" -- else what ?
|
|
row.push( pba[1] );
|
|
}
|
|
|
|
table.push( '\n\t', row );
|
|
inner = ribbon( m[2] );
|
|
|
|
do {
|
|
inner.save();
|
|
|
|
// cell loop
|
|
var th = inner.startsWith( '_' )
|
|
, cell = [ th ? 'th' : 'td' ]
|
|
;
|
|
if ( th ) {
|
|
inner.advance( 1 );
|
|
}
|
|
|
|
pba = parse_attr( inner, 'td' );
|
|
if ( pba ) {
|
|
inner.advance( pba[0] );
|
|
cell.push( pba[1] ); // FIXME: don't do this if next text fails
|
|
}
|
|
|
|
if ( pba || th ) {
|
|
var d = /^\.\s*/.exec( inner );
|
|
if ( d ) {
|
|
inner.advance( d[0] );
|
|
}
|
|
else {
|
|
cell = [ 'td' ];
|
|
inner.load();
|
|
}
|
|
}
|
|
|
|
var mx = /^(==.*?==|[^\|])*/.exec( inner );
|
|
cell = cell.concat( parse_inline( mx[0], options ) );
|
|
row.push( '\n\t\t', cell );
|
|
more = inner.valueOf().charAt( mx[0].length ) === '|';
|
|
inner.advance( mx[0].length + 1 );
|
|
|
|
}
|
|
while ( more );
|
|
|
|
row.push( '\n\t' );
|
|
|
|
src.advance( m[0] );
|
|
}
|
|
table.push( '\n' );
|
|
return table;
|
|
|
|
}
|
|
|
|
|
|
/* inline parser */
|
|
|
|
function parse_inline ( src, options ) {
|
|
|
|
src = ribbon( src );
|
|
var list = builder()
|
|
, m
|
|
, pba
|
|
;
|
|
|
|
// loop
|
|
do {
|
|
src.save();
|
|
|
|
// linebreak -- having this first keeps it from messing to much with other phrases
|
|
if ( src.startsWith( '\n' ) ) {
|
|
src.advance( 1 );
|
|
|
|
if ( options.breaks ) {
|
|
list.add( [ 'br' ] );
|
|
}
|
|
list.add( '\n' );
|
|
continue;
|
|
}
|
|
|
|
// inline notextile
|
|
if ( (m = /^==(.*?)==/.exec( src )) ) {
|
|
src.advance( m[0] );
|
|
list.add( m[1] );
|
|
continue;
|
|
}
|
|
|
|
// lookbehind => /([\s>.,"'?!;:])$/
|
|
var behind = src.lookbehind( 1 );
|
|
var boundary = !behind || /^[\s>.,"'?!;:()]$/.test( behind );
|
|
// FIXME: need to test right boundary for phrases as well
|
|
if ( (m = re_phrase.exec( src )) && ( boundary || m[1] ) ) {
|
|
src.advance( m[0] );
|
|
var tok = m[2]
|
|
, fence = m[1]
|
|
, phrase_type = phrase_convert[ tok ]
|
|
, code = phrase_type === 'code'
|
|
;
|
|
if ( (pba = !code && parse_attr( src, phrase_type, tok )) ) {
|
|
src.advance( pba[0] );
|
|
pba = pba[1];
|
|
}
|
|
// FIXME: if we can't match the fence on the end, we should output fence-prefix as normal text
|
|
// seek end
|
|
var m_mid;
|
|
var m_end;
|
|
if ( fence === '[' ) {
|
|
m_mid = '^(.*?)';
|
|
m_end = '(?:])';
|
|
}
|
|
else if ( fence === '{' ) {
|
|
m_mid = '^(.*?)';
|
|
m_end = '(?:})';
|
|
}
|
|
else {
|
|
var t1 = re.escape( tok.charAt(0) );
|
|
m_mid = ( code )
|
|
? '^(\\S+|\\S+.*?\\S)'
|
|
: '^([^\\s' + t1 + ']+|[^\\s' + t1 + '].*?\\S('+t1+'*))'
|
|
;
|
|
m_end = '(?=$|[\\s.,"\'!?;:()«»„“”‚‘’])';
|
|
}
|
|
var rx = re.compile( m_mid + '(' + re.escape( tok ) + ')' + m_end );
|
|
if ( (m = rx.exec( src )) && m[1] ) {
|
|
src.advance( m[0] );
|
|
if ( code ) {
|
|
list.add( [ phrase_type, m[1] ] );
|
|
}
|
|
else {
|
|
list.add( [ phrase_type, pba ].concat( parse_inline( m[1], options ) ) );
|
|
}
|
|
continue;
|
|
}
|
|
// else
|
|
src.load();
|
|
}
|
|
|
|
// image
|
|
if ( (m = re_image.exec( src )) || (m = re_image_fenced.exec( src )) ) {
|
|
src.advance( m[0] );
|
|
|
|
pba = m[1] && parse_attr( m[1], 'img' );
|
|
var attr = pba ? pba[1] : { 'src':'' }
|
|
, img = [ 'img', attr ]
|
|
;
|
|
attr.src = m[2];
|
|
attr.alt = m[3] ? ( attr.title = m[3] ) : '';
|
|
|
|
if ( m[4] ) { // +cite causes image to be wraped with a link (or link_ref)?
|
|
// TODO: support link_ref for image cite
|
|
img = [ 'a', { 'href': m[4] }, img ];
|
|
}
|
|
list.add( img );
|
|
continue;
|
|
}
|
|
|
|
// html comment
|
|
if ( (m = re_html_comment.exec( src )) ) {
|
|
src.advance( m[0] );
|
|
list.add( [ '!', m[1] ] );
|
|
continue;
|
|
}
|
|
// html tag
|
|
// TODO: this seems to have a lot of overlap with block tags... DRY?
|
|
if ( (m = re_html_tag.exec( src )) ) {
|
|
src.advance( m[0] );
|
|
var tag = m[1]
|
|
, single = m[3] || m[1] in html_singletons
|
|
, element = [ tag ]
|
|
, tail = m[4]
|
|
;
|
|
if ( m[2] ) {
|
|
element.push( parse_html_attr( m[2] ) );
|
|
}
|
|
if ( single ) { // single tag
|
|
list.add( element ).add( tail );
|
|
continue;
|
|
}
|
|
else { // need terminator
|
|
// gulp up the rest of this block...
|
|
var re_end_tag = re.compile( "^(.*?)(</" + tag + "\\s*>)", 's' );
|
|
if ( (m = re_end_tag.exec( src )) ) {
|
|
src.advance( m[0] );
|
|
if ( tag === 'code' ) {
|
|
element.push( tail, m[1] );
|
|
}
|
|
else if ( tag === 'notextile' ) {
|
|
list.merge( parse_inline( m[1], options ) );
|
|
continue;
|
|
}
|
|
else {
|
|
element = element.concat( parse_inline( m[1], options ) );
|
|
}
|
|
list.add( element );
|
|
continue;
|
|
}
|
|
// end tag is missing, treat tag as normal text...
|
|
}
|
|
src.load();
|
|
}
|
|
|
|
// footnote
|
|
if ( (m = re_footnote.exec( src )) ) {
|
|
src.advance( m[0] );
|
|
list.add( [ 'sup', { 'class': 'footnote', 'id': 'fnr' + m[1] },
|
|
[ 'a', { href: '#fn' + m[1] }, m[1] ]
|
|
] );
|
|
continue;
|
|
}
|
|
|
|
// caps / abbr
|
|
if ( (m = re_caps.exec( src )) ) {
|
|
src.advance( m[0] );
|
|
var caps = [ 'span', { 'class': 'caps' }, m[1] ];
|
|
if ( m[2] ) {
|
|
caps = [ 'acronym', { 'title': m[2] }, caps ]; // FIXME: use <abbr>, not acronym!
|
|
}
|
|
list.add( caps );
|
|
continue;
|
|
}
|
|
|
|
// links
|
|
if ( (boundary && (m = re_link.exec( src ))) || (m = re_link_fenced.exec( src )) ) {
|
|
src.advance( m[0].length );
|
|
var title = m[1].match( re_link_title )
|
|
, inner = ( title ) ? m[1].slice( 0, m[1].length - title[0].length ) : m[1]
|
|
;
|
|
if ( (pba = parse_attr( inner, 'a' )) ) {
|
|
inner = inner.slice( pba[0] );
|
|
pba = pba[1];
|
|
}
|
|
else {
|
|
pba = {};
|
|
}
|
|
if ( title && !inner ) { inner = title[0]; title = ""; }
|
|
pba.href = m[2];
|
|
if ( title ) { pba.title = title[1]; }
|
|
list.add( [ 'a', pba ].concat( parse_inline( inner.replace( /^(\.?\s*)/, '' ), options ) ) );
|
|
continue;
|
|
}
|
|
|
|
// no match, move by all "uninteresting" chars
|
|
m = /([a-zA-Z0-9,.':]+|\s+|[^\0])/.exec( src );
|
|
if ( m ) {
|
|
list.add( m[0] );
|
|
}
|
|
src.advance( m ? m[0].length || 1 : 1 );
|
|
|
|
}
|
|
while ( src.valueOf() );
|
|
|
|
return list.get().map( parse_glyphs );
|
|
}
|
|
|
|
|
|
/* block parser */
|
|
|
|
function parse_blocks ( src, options ) {
|
|
|
|
var list = builder()
|
|
, paragraph = function ( s, tag, pba, linebreak ) {
|
|
tag = tag || 'p';
|
|
var out = [];
|
|
s.split( /\n\n+/ ).forEach(function( bit, i ) {
|
|
if ( tag === 'p' && /^\s/.test( bit ) ) {
|
|
// no-paragraphs
|
|
// WTF?: Why does Textile not allow linebreaks in spaced lines
|
|
bit = bit.replace( /\n[\t ]/g, ' ' ).trim();
|
|
out = out.concat( parse_inline( bit, options ) );
|
|
}
|
|
else {
|
|
if ( linebreak && i ) { out.push( linebreak ); }
|
|
out.push( pba ? [ tag, pba ].concat( parse_inline( bit, options ) )
|
|
: [ tag ].concat( parse_inline( bit, options ) ) );
|
|
}
|
|
});
|
|
return out;
|
|
}
|
|
, link_refs = {}
|
|
, m
|
|
;
|
|
src = ribbon( src.replace( /^( *\n)+/, '' ) );
|
|
|
|
// loop
|
|
while ( src.valueOf() ) {
|
|
src.save();
|
|
|
|
// link_ref -- this goes first because it shouldn't trigger a linebreak
|
|
if ( (m = re_link_ref.exec( src )) ) {
|
|
src.advance( m[0] );
|
|
link_refs[ m[1] ] = m[2];
|
|
continue;
|
|
}
|
|
|
|
// add linebreak
|
|
list.linebreak();
|
|
|
|
// named block
|
|
if ( (m = re_block.exec( src )) ) {
|
|
src.advance( m[0] );
|
|
var block_type = m[0]
|
|
, pba = parse_attr( src, block_type )
|
|
;
|
|
if ( pba ) {
|
|
src.advance( pba[0] );
|
|
pba = pba[1];
|
|
}
|
|
if ( (m = /\.(\.?)(?:\s|(?=:))/.exec( src )) ) {
|
|
// FIXME: this whole copy_pba seems rather strange?
|
|
// slurp rest of block
|
|
var extended = !!m[1];
|
|
m = ( extended ? re_block_extended : re_block_normal ).exec( src.advance( m[0] ) );
|
|
src.advance( m[0] );
|
|
// bq | bc | notextile | pre | h# | fn# | p | ###
|
|
if ( block_type === 'bq' ) {
|
|
var cite, inner = m[1];
|
|
if ( (m = /^:(\S+)\s+/.exec( inner )) ) {
|
|
if ( !pba ) { pba = {}; }
|
|
pba.cite = m[1];
|
|
inner = inner.slice( m[0].length );
|
|
}
|
|
// RedCloth adds all attr to both: this is bad because it produces duplicate IDs
|
|
list.add( [ 'blockquote', pba, '\n' ].concat(
|
|
paragraph( inner, 'p', copy_pba(pba, { 'cite':1, 'id':1 }), '\n' )
|
|
).concat(['\n']) );
|
|
}
|
|
else if ( block_type === 'bc' ) {
|
|
var sub_pba = ( pba ) ? copy_pba(pba, { 'id':1 }) : null;
|
|
list.add( [ 'pre', pba, ( sub_pba ? [ 'code', sub_pba, m[1] ] : [ 'code', m[1] ] ) ] );
|
|
}
|
|
else if ( block_type === 'notextile' ) {
|
|
list.merge( parse_html( m[1] ) );
|
|
}
|
|
else if ( block_type === '###' ) {
|
|
// ignore the insides
|
|
}
|
|
else if ( block_type === 'pre' ) {
|
|
// I disagree with RedCloth, but agree with PHP here:
|
|
// "pre(foo#bar).. line1\n\nline2" prevents multiline preformat blocks
|
|
// ...which seems like the whole point of having an extended pre block?
|
|
list.add( [ 'pre', pba, m[1] ] );
|
|
}
|
|
else if ( re_footnote_def.test( block_type ) ) { // footnote
|
|
// Need to be careful: RedCloth fails "fn1(foo#m). footnote" -- it confuses the ID
|
|
var fnid = block_type.replace( /\D+/g, '' );
|
|
if ( !pba ) { pba = {}; }
|
|
pba['class'] = ( pba['class'] ? pba['class'] + ' ' : '' ) + 'footnote';
|
|
pba['id'] = 'fn' + fnid;
|
|
list.add( [ "p", pba, [ 'a', { 'href': '#fnr' + fnid }, [ 'sup', fnid ] ], ' ' ].concat( parse_inline( m[1], options ) ) );
|
|
}
|
|
else { // heading | paragraph
|
|
list.merge( paragraph( m[1], block_type, pba, '\n' ) );
|
|
}
|
|
continue;
|
|
}
|
|
else {
|
|
src.load();
|
|
}
|
|
}
|
|
|
|
// HTML comment
|
|
if ( (m = re_html_comment.exec( src )) ) {
|
|
src.advance( m[0] + (/(?:\s*\n+)+/.exec( src ) || [])[0] );
|
|
list.add( [ '!', m[1] ] );
|
|
continue;
|
|
}
|
|
|
|
// block HTML
|
|
if ( (m = re_html_tag_block.exec( src )) ) {
|
|
var tag = m[1]
|
|
, single = m[3] || tag in html_singletons
|
|
, tail = m[4]
|
|
;
|
|
// Unsurprisingly, all Textile implementations I have tested have trouble parsing simple HTML:
|
|
//
|
|
// "<div>a\n<div>b\n</div>c\n</div>d"
|
|
//
|
|
// I simply match them here as there is no way anyone is using nested HTML today, or if they
|
|
// are, then this will at least output less broken HTML as redundant tags will get quoted.
|
|
|
|
// Is block tag? ...
|
|
if ( tag in allowed_blocktags ) {
|
|
src.advance( m[0] );
|
|
|
|
var element = [ tag ];
|
|
|
|
if ( m[2] ) {
|
|
element.push( parse_html_attr( m[2] ) );
|
|
}
|
|
|
|
if ( single ) { // single tag
|
|
// let us add the element and continue our quest...
|
|
list.add( element );
|
|
continue;
|
|
}
|
|
else { // block
|
|
|
|
// gulp up the rest of this block...
|
|
var re_end_tag = re.compile( "^(.*?)(\\s*)(</" + tag + "\\s*>)(\\s*)", 's' );
|
|
if ( (m = re_end_tag.exec( src )) ) {
|
|
src.advance( m[0] );
|
|
if ( tag === 'pre' ) {
|
|
element.push( tail );
|
|
element = element.concat( parse_html( m[1].replace( /\n+$/, '' ), { 'code': 1 } ) );
|
|
if ( m[2] ) { element.push( m[2] ); }
|
|
list.add( element );
|
|
}
|
|
else if ( tag === 'notextile' ) {
|
|
element = parse_html( m[1].trim() );
|
|
list.merge( element );
|
|
}
|
|
else if ( tag === 'script' || tag === 'noscript' ) {
|
|
//element = parse_html( m[1].trim() );
|
|
element.push( tail + m[1] );
|
|
list.add( element );
|
|
}
|
|
else {
|
|
// These strange (and unnecessary) linebreak tests are here to get the
|
|
// tests working perfectly. In reality, this doesn't matter one bit.
|
|
if ( /\n/.test( tail ) ) { element.push( '\n' ); }
|
|
if ( /\n/.test( m[1] ) ) {
|
|
element = element.concat( parse_blocks( m[1], options ) );
|
|
}
|
|
else {
|
|
element = element.concat( parse_inline( m[1].replace( /^ +/, '' ), options ) );
|
|
}
|
|
if ( /\n/.test( m[2] ) ) { element.push( '\n' ); }
|
|
|
|
list.add( element );
|
|
}
|
|
continue;
|
|
}
|
|
/*else {
|
|
// end tag is missing, treat tag as normal text...
|
|
}*/
|
|
}
|
|
}
|
|
src.load();
|
|
}
|
|
|
|
// ruler
|
|
if ( (m = re_ruler.exec( src )) ) {
|
|
src.advance( m[0] );
|
|
list.add( [ 'hr' ] );
|
|
continue;
|
|
}
|
|
|
|
// list
|
|
if ( (m = re_list.exec( src )) ) {
|
|
src.advance( m[0] );
|
|
list.add( parse_list( m[0], options ) );
|
|
continue;
|
|
}
|
|
|
|
// table
|
|
if ( (m = re_table.exec( src )) ) {
|
|
src.advance( m[0] );
|
|
list.add( parse_table( m[1], options ) );
|
|
continue;
|
|
}
|
|
|
|
// paragraph
|
|
m = re_block_normal.exec( src );
|
|
list.merge( paragraph( m[1], 'p', undefined, "\n" ) );
|
|
src.advance( m[0] );
|
|
|
|
}
|
|
|
|
return list.get().map( fix_links, link_refs );
|
|
}
|
|
|
|
|
|
// recurse the tree and swap out any "href" attributes
|
|
function fix_links ( jsonml ) {
|
|
if ( _isArray( jsonml ) ) {
|
|
if ( jsonml[0] === 'a' ) { // found a link
|
|
var attr = jsonml[1];
|
|
if ( typeof attr === "object" && 'href' in attr && attr.href in this ) {
|
|
attr.href = this[ attr.href ];
|
|
}
|
|
}
|
|
for (var i=1,l=jsonml.length; i<l; i++) {
|
|
if ( _isArray( jsonml[i] ) ) {
|
|
fix_links.call( this, jsonml[i] );
|
|
}
|
|
}
|
|
}
|
|
return jsonml;
|
|
}
|
|
|
|
|
|
|
|
/* exposed */
|
|
|
|
function textile ( txt, opt ) {
|
|
// get a throw-away copy of options
|
|
opt = merge( merge( {}, textile.defaults ), opt || {} );
|
|
// run the converter
|
|
return parse_blocks( txt, opt ).map( JSONML.toHTML ).join( '' );
|
|
}
|
|
textile.defaults
|
|
|
|
// options
|
|
textile.defaults = {
|
|
'breaks': true // single-line linebreaks are converted to <br> by default
|
|
};
|
|
textile.setOptions = textile.setoptions = function ( opt ) {
|
|
merge( textile.defaults, opt );
|
|
return this;
|
|
};
|
|
|
|
|
|
textile.parse = textile.convert = textile;
|
|
textile.html_parser = parse_html;
|
|
textile.jsonml = function ( txt, opt ) {
|
|
// get a throw-away copy of options
|
|
opt = merge( merge( {}, textile.defaults ), opt || {} );
|
|
// parse and return tree
|
|
return [ 'html' ].concat( parse_blocks( txt, opt ) );
|
|
};
|
|
textile.serialize = JSONML.toHTML;
|
|
|
|
if ( typeof module !== 'undefined' && module.exports ) {
|
|
module.exports = textile;
|
|
}
|
|
else {
|
|
this.textile = textile;
|
|
}
|
|
|
|
|
|
}).call(function() {
|
|
return this || (typeof window !== 'undefined' ? window : global);
|
|
}());
|