/*** * Textile parser for JavaScript * * Copyright (c) 2012 Borgar Þorsteinsson (MIT License). * */ /*jshint laxcomma:true laxbreak:true eqnull:true loopfunc:true sub:true */ ;(function(){ "use strict"; /*** * Regular Expression helper methods * * This provides the `re` object, which contains several helper * methods for working with big regular expressions (soup). * */ var re = { _cache: {} , pattern: { 'punct': "[!-/:-@\\[\\\\\\]-`{-~]" , 'space': '\\s' } , escape: function ( src ) { return src.replace( /[\-\[\]\{\}\(\)\*\+\?\.\,\\\^\$\|\#\s]/g, "\\$&" ); } , collapse: function ( src ) { return src.replace( /(?:#.*?(?:\n|$))/g, '' ) .replace( /\s+/g, '' ) ; } , expand_patterns: function ( src ) { // TODO: provide escape for patterns: \[:pattern:] ? return src.replace( /\[\:\s*(\w+)\s*\:\]/g, function ( m, k ) { return ( k in re.pattern ) ? re.expand_patterns( re.pattern[ k ] ) : k ; }) ; } , isRegExp: function ( r ) { return Object.prototype.toString.call( r ) === "[object RegExp]"; } , compile: function ( src, flags ) { if ( re.isRegExp( src ) ) { if ( arguments.length === 1 ) { // no flags arg provided, use the RegExp one flags = ( src.global ? 'g' : '' ) + ( src.ignoreCase ? 'i' : '' ) + ( src.multiline ? 'm' : '' ); } src = src.source; } // don't do the same thing twice var ckey = src + ( flags || '' ); if ( ckey in re._cache ) { return re._cache[ ckey ]; } // allow classes var rx = re.expand_patterns( src ); // allow verbose expressions if ( flags && /x/.test( flags ) ) { rx = re.collapse( rx ); } // allow dotall expressions if ( flags && /s/.test( flags ) ) { rx = rx.replace( /([^\\])\./g, '$1[^\\0]' ); } // TODO: test if MSIE and add replace \s with [\s\u00a0] if it is? // clean flags and output new regexp flags = ( flags || '' ).replace( /[^gim]/g, '' ); return ( re._cache[ ckey ] = new RegExp( rx, flags ) ); } }; /*** * JSONML helper methods - http://www.jsonml.org/ * * This provides the `JSONML` object, which contains helper * methods for rendering JSONML to HTML. * * Note that the tag ! is taken to mean comment, this is however * not specified in the JSONML spec. * */ var JSONML = { escape: function ( text, esc_quotes ) { return text.replace( /&(?!(#\d{2,}|#x[\da-fA-F]{2,}|[a-zA-Z][a-zA-Z1-4]{1,6});)/g, "&" ) .replace( //g, ">" ) .replace( /"/g, esc_quotes ? """ : '"' ) .replace( /'/g, esc_quotes ? "'" : "'" ) ; } , toHTML: function ( jsonml ) { jsonml = jsonml.concat(); // basic case if ( typeof jsonml === "string" ) { return JSONML.escape( jsonml ); } var tag = jsonml.shift() , attributes = {} , content = [] , tag_attrs = "" , a ; if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !_isArray( jsonml[ 0 ] ) ) { attributes = jsonml.shift(); } while ( jsonml.length ) { content.push( JSONML.toHTML( jsonml.shift() ) ); } for ( a in attributes ) { tag_attrs += ( attributes[ a ] == null ) ? " " + a : " " + a + '="' + JSONML.escape( attributes[ a ], true ) + '"' ; } // be careful about adding whitespace here for inline elements if ( tag == "!" ) { return ""; } else if ( tag === "img" || tag === "br" || tag === "hr" || tag === "input" ) { return "<" + tag + tag_attrs + " />"; } else { return "<" + tag + tag_attrs + ">" + content.join( "" ) + ""; } } }; // merge object b properties into obect a function merge ( a, b ) { for ( var k in b ) { a[ k ] = b[ k ]; } return a; } var _isArray = Array.isArray || function ( a ) { return Object.prototype.toString.call(a) === '[object Array]'; }; /* expressions */ re.pattern[ 'blocks' ] = '(?:b[qc]|div|notextile|pre|h[1-6]|fn\\d+|p|###)'; re.pattern[ 'pba_class' ] = '\\([^\\)]+\\)'; re.pattern[ 'pba_style' ] = '\\{[^\\}]+\\}'; re.pattern[ 'pba_lang' ] = '\\[[^\\[\\]]+\\]'; re.pattern[ 'pba_align' ] = '(?:<>|<|>|=)'; re.pattern[ 'pba_pad' ] = '[\\(\\)]+'; re.pattern[ 'pba_attr' ] = '(?:[:pba_class:]|[:pba_style:]|[:pba_lang:]|[:pba_align:]|[:pba_pad:])*'; re.pattern[ 'url_punct' ] = '[.,«»″‹›!?]'; re.pattern[ 'html_id' ] = '[a-zA-Z][a-zA-Z\\d:]*'; re.pattern[ 'html_attr' ] = '(?:"[^"]+"|\'[^\']+\'|[^>\\s]+)'; re.pattern[ 'tx_urlch' ] = '[\\w"$\\-_.+!*\'(),";\\/?:@=&%#{}|\\\\^~\\[\\]`]'; re.pattern[ 'tx_cite' ] = ':((?:[^\\s()]|\\([^\\s()]+\\)|[()])+?)(?=[!-\\.:-@\\[\\\\\\]-`{-~]+(?:$|\\s)|$|\\s)'; re.pattern[ 'ucaps' ] = "A-Z"+ // Latin extended À-Þ "\u00c0-\u00d6\u00d8-\u00de"+ // Latin caps with embelishments and ligatures... "\u0100\u0102\u0104\u0106\u0108\u010a\u010c\u010e\u0110\u0112\u0114\u0116\u0118\u011a\u011c\u011e\u0120\u0122\u0124\u0126\u0128\u012a\u012c\u012e\u0130\u0132\u0134\u0136\u0139\u013b\u013d\u013f"+ "\u0141\u0143\u0145\u0147\u014a\u014c\u014e\u0150\u0152\u0154\u0156\u0158\u015a\u015c\u015e\u0160\u0162\u0164\u0166\u0168\u016a\u016c\u016e\u0170\u0172\u0174\u0176\u0178\u0179\u017b\u017d"+ "\u0181\u0182\u0184\u0186\u0187\u0189-\u018b\u018e-\u0191\u0193\u0194\u0196-\u0198\u019c\u019d\u019f\u01a0\u01a2\u01a4\u01a6\u01a7\u01a9\u01ac\u01ae\u01af\u01b1-\u01b3\u01b5\u01b7\u01b8\u01bc"+ "\u01c4\u01c7\u01ca\u01cd\u01cf\u01d1\u01d3\u01d5\u01d7\u01d9\u01db\u01de\u01e0\u01e2\u01e4\u01e6\u01e8\u01ea\u01ec\u01ee\u01f1\u01f4\u01f6-\u01f8\u01fa\u01fc\u01fe"+ "\u0200\u0202\u0204\u0206\u0208\u020a\u020c\u020e\u0210\u0212\u0214\u0216\u0218\u021a\u021c\u021e\u0220\u0222\u0224\u0226\u0228\u022a\u022c\u022e\u0230\u0232\u023a\u023b\u023d\u023e"+ "\u0241\u0243-\u0246\u0248\u024a\u024c\u024e"+ "\u1e00\u1e02\u1e04\u1e06\u1e08\u1e0a\u1e0c\u1e0e\u1e10\u1e12\u1e14\u1e16\u1e18\u1e1a\u1e1c\u1e1e\u1e20\u1e22\u1e24\u1e26\u1e28\u1e2a\u1e2c\u1e2e\u1e30\u1e32\u1e34\u1e36\u1e38\u1e3a\u1e3c\u1e3e\u1e40"+ "\u1e42\u1e44\u1e46\u1e48\u1e4a\u1e4c\u1e4e\u1e50\u1e52\u1e54\u1e56\u1e58\u1e5a\u1e5c\u1e5e\u1e60\u1e62\u1e64\u1e66\u1e68\u1e6a\u1e6c\u1e6e\u1e70\u1e72\u1e74\u1e76\u1e78\u1e7a\u1e7c\u1e7e"+ "\u1e80\u1e82\u1e84\u1e86\u1e88\u1e8a\u1e8c\u1e8e\u1e90\u1e92\u1e94\u1e9e\u1ea0\u1ea2\u1ea4\u1ea6\u1ea8\u1eaa\u1eac\u1eae\u1eb0\u1eb2\u1eb4\u1eb6\u1eb8\u1eba\u1ebc\u1ebe"+ "\u1ec0\u1ec2\u1ec4\u1ec6\u1ec8\u1eca\u1ecc\u1ece\u1ed0\u1ed2\u1ed4\u1ed6\u1ed8\u1eda\u1edc\u1ede\u1ee0\u1ee2\u1ee4\u1ee6\u1ee8\u1eea\u1eec\u1eee\u1ef0\u1ef2\u1ef4\u1ef6\u1ef8\u1efa\u1efc\u1efe"+ "\u2c60\u2c62-\u2c64\u2c67\u2c69\u2c6b\u2c6d-\u2c70\u2c72\u2c75\u2c7e\u2c7f"+ "\ua722\ua724\ua726\ua728\ua72a\ua72c\ua72e\ua732\ua734\ua736\ua738\ua73a\ua73c\ua73e"+ "\ua740\ua742\ua744\ua746\ua748\ua74a\ua74c\ua74e\ua750\ua752\ua754\ua756\ua758\ua75a\ua75c\ua75e\ua760\ua762\ua764\ua766\ua768\ua76a\ua76c\ua76e\ua779\ua77b\ua77d\ua77e"+ "\ua780\ua782\ua784\ua786\ua78b\ua78d\ua790\ua792\ua7a0\ua7a2\ua7a4\ua7a6\ua7a8\ua7aa"; var re_block = re.compile( /^([:blocks:])/ ) , re_block_se = re.compile( /^[:blocks:]$/ ) , re_block_normal = re.compile( /^(.*?)($|\n(?:\s*\n|$)+)/, 's' ) , re_block_extended = re.compile( /^(.*?)($|\n+(?=[:blocks:][:pba_attr:]\.))/, 's' ) , re_ruler = /^(\-\-\-+|\*\*\*+|___+)(\n\s+|$)/ , re_list = re.compile( /^((?:[\t ]*[\#\*]+[:pba_attr:] .+?(?:\n|$))+)(\s*\n)?/ ) , re_list_item = /^([\#\*]+)(.+?)(\n|$)/ , re_table = re.compile( /^((?:table[:pba_attr:]\.\n)?(?:(?:[:pba_attr:]\.[^\n\S]*)?\|.*?\|[^\n\S]*(?:\n|$))+)([^\n\S]*\n)?/, 's' ) , re_table_head = /^table(_?)([^\n]+)\.\s?\n/ , re_table_row = re.compile( /^([:pba_attr:]\.[^\n\S]*)?\|(.*?)\|[^\n\S]*(\n|$)/, 's' ) , re_fenced_phrase = /^\[(__?|\*\*?|\?\?|[\-\+\^~@%])([^\n]+)\1\]/ , re_phrase = /^([\[\{]?)(__?|\*\*?|\?\?|[\-\+\^~@%])/ , re_text = re.compile( /^.+?(?=[\\(\n*)/ ) , re_html_tag = re.compile( /^<([:html_id:])((?:\s[^=\s\/]+(?:\s*=\s*[:html_attr:])?)+)?\s*(\/?)>(\n*)/ ) , re_html_comment = re.compile( /^/, 's' ) , re_html_end_tag = re.compile( /^<\/([:html_id:])([^>]*)>/ ) , re_html_attr = re.compile( /^\s*([^=\s]+)(?:\s*=\s*("[^"]+"|'[^']+'|[^>\s]+))?/ ) , re_entity = /&(#\d\d{2,}|#x[\da-fA-F]{2,}|[a-zA-Z][a-zA-Z1-4]{1,6});/ // glyphs , re_dimsign = /([\d\.,]+['"]? ?)x( ?)(?=[\d\.,]['"]?)/g , re_emdash = /(^|[\s\w])--([\s\w]|$)/g , re_trademark = /(\b ?|\s|^)(?:\((?:TM|tm)\)|\[(?:TM|tm)\])/g , re_registered = /(\b ?|\s|^)(?:\(R\)|\[R\])/gi , re_copyright = /(\b ?|\s|^)(?:\(C\)|\[C\])/gi , re_apostrophe = /(\w)\'(\w)/g , re_double_prime = re.compile( /(\d*[\.,]?\d+)"(?=\s|$|[:punct:])/g ) , re_single_prime = re.compile( /(\d*[\.,]?\d+)'(?=\s|$|[:punct:])/g ) , re_closing_dquote = re.compile( /([^\s\[\(])"(?=$|\s|[:punct:])/g ) , re_closing_squote = re.compile( /([^\s\[\(])'(?=$|\s|[:punct:])/g ) // pba , re_pba_classid = /^\(([^\(\)\n]+)\)/ , re_pba_padding_l = /^([\(]+)/ , re_pba_padding_r = /^([\)]+)/ , re_pba_align_blk = /^(<>|<|>|=)/ , re_pba_align_img = /^(<|>|=)/ , re_pba_valign = /^(~|\^|\-)/ , re_pba_colspan = /^\\(\d+)/ , re_pba_rowspan = /^\/(\d+)/ , re_pba_styles = /^\{([^\}]*)\}/ , re_pba_css = /^\s*([^:\s]+)\s*:\s*(.+)\s*$/ , re_pba_lang = /^\[([^\[\]]+)\]/ ; var phrase_convert = { '*': 'strong' , '**': 'b' , '??': 'cite' , '_': 'em' , '__': 'i' , '-': 'del' , '%': 'span' , '+': 'ins' , '~': 'sub' , '^': 'sup' , '@': 'code' }; // area, base, basefont, bgsound, br, col, command, embed, frame, hr, // img, input, keygen, link, meta, param, source, track or wbr var html_singletons = { 'br': 1 , 'hr': 1 , 'img': 1 , 'link': 1 , 'meta': 1 , 'wbr': 1 , 'area': 1 , 'param': 1 , 'input': 1 , 'option': 1 , 'base': 1 }; var pba_align_lookup = { '<': 'left' , '=': 'center' , '>': 'right' , '<>': 'justify' }; var pba_valign_lookup = { '~':'bottom' , '^':'top' , '-':'middle' }; // HTML tags allowed in the document (root) level that trigger HTML parsing var allowed_blocktags = { 'p': 0 , 'hr': 0 , 'ul': 1 , 'ol': 0 , 'li': 0 , 'div': 1 , 'pre': 0 , 'object': 1 , 'script': 0 , 'noscript': 0 , 'blockquote': 1 , 'notextile': 1 }; function ribbon ( feed ) { var _slot = null , org = feed + '' , pos = 0 ; return { save: function () { _slot = pos; } , load: function () { pos = _slot; feed = org.slice( pos ); } , advance: function ( n ) { pos += ( typeof n === 'string' ) ? n.length : n; return ( feed = org.slice( pos ) ); } , lookbehind: function ( nchars ) { nchars = nchars == null ? 1 : nchars; return org.slice( pos - nchars, pos ); } , startsWith: function ( s ) { return feed.substring(0, s.length) === s; } , valueOf: function(){ return feed; } , toString: function(){ return feed; } }; } function builder ( arr ) { var _arr = _isArray( arr ) ? arr : []; return { add: function ( node ) { if ( typeof node === 'string' && typeof _arr[_arr.length - 1 ] === 'string' ) { // join if possible _arr[ _arr.length - 1 ] += node; } else if ( _isArray( node ) ) { var f = node.filter(function(s){ return s !== undefined; }); _arr.push( f ); } else if ( node ) { _arr.push( node ); } return this; } , merge: function ( s ) { for (var i=0,l=s.length; i

- user can still create nonsensical but "well-formed" markup function parse_html ( src, whitelist_tags ) { var org = src + '' , list = [] , root = list , _stack = [] , m , oktag = whitelist_tags ? function ( tag ) { return tag in whitelist_tags; } : function () { return true; } , tag ; src = (typeof src === 'string') ? ribbon( src ) : src; // loop do { if ( (m = re_html_comment.exec( src )) && oktag('!') ) { src.advance( m[0] ); list.push( [ '!', m[1] ] ); } // end tag else if ( (m = re_html_end_tag.exec( src )) && oktag(m[1]) ) { tag = m[1]; var junk = m[2]; if ( _stack.length ) { for (var i=_stack.length-1; i>=0; i--) { var head = _stack[i]; if ( head[0] === tag ) { _stack.splice( i ); list = _stack[ _stack.length - 1 ] || root; break; } } } src.advance( m[0] ); } // open/void tag else if ( (m = re_html_tag.exec( src )) && oktag(m[1]) ) { src.advance( m[0] ); tag = m[1]; var single = m[3] || m[1] in html_singletons , tail = m[4] , element = [ tag ] ; // attributes if ( m[2] ) { element.push( parse_html_attr( m[2] ) ); } // tag if ( single ) { // single tag // let us add the element and continue our quest... list.push( element ); if ( tail ) { list.push( tail ); } } else { // open tag if ( tail ) { element.push( tail ); } // TODO: some things auto close other things: ,

  • ,

    , // if ( tag === 'p' && _stack.length ) { // var seek = /^(p)$/; // for (var i=_stack.length-1; i>=0; i--) { // var head = _stack[i]; // if ( seek.test( head[0] ) /* === tag */ ) { // //src.advance( m[0] ); // _stack.splice( i ); // list = _stack[i] || root; // } // } // } // TODO: some elements can move parser into "text" mode // style, xmp, iframe, noembed, noframe, textarea, title, script, noscript, plaintext //if ( /^(script)$/.test( tag ) ) { } _stack.push( element ); list.push( element ); list = element; } } else { // no match, move by all "uninteresting" chars m = /([^<]+|[^\0])/.exec( src ); if ( m ) { list.push( m[0] ); } src.advance( m ? m[0].length || 1 : 1 ); } } while ( src.valueOf() ); return root; } /* attribute parser */ function parse_attr ( input, element, end_token ) { /* The attr bit causes massive problems for span elements when parens are used. Parens are a total mess and, unsurprisingly, causes trip ups: RC: `_{display:block}(span) span (span)_` -> `(span) span (span)` PHP: `_{display:block}(span) span (span)_` -> `(span) span (span)` PHP and RC seem to mostly solve this by not parsing a final attr parens on spans if the following character is a non-space. I've duplicated that: Class/ID is not matched on spans if it is followed by `end_token` or . */ input += ''; if ( !input || element === 'notextile' ) { return undefined; } var m , st = {} , o = { 'style': st } , remaining = input , is_block = element === 'table' || element === 'td' || re_block_se.test( element ) // "in" test would be better but what about fn#.? , is_img = element === 'img' , is_phrase = !is_block && !is_img && element !== 'a' , re_pba_align = ( is_img ) ? re_pba_align_img : re_pba_align_blk ; do { if ( (m = re_pba_styles.exec( remaining )) ) { m[1].split(';').forEach(function(p){ var d = p.match( re_pba_css ); if ( d ) { st[ d[1] ] = d[2]; } }); remaining = remaining.slice( m[0].length ); continue; } if ( (m = re_pba_lang.exec( remaining )) ) { o['lang'] = m[1]; remaining = remaining.slice( m[0].length ); continue; } if ( (m = re_pba_classid.exec( remaining )) ) { var rm = remaining.slice( m[0].length ); if ( ( !rm && is_phrase ) || ( end_token && (rm[0] === ' ' || end_token === rm.slice(0,end_token.length)) ) ) { m = null; continue; } var bits = m[1].split( '#' ); if ( bits[0] ) { o['class'] = bits[0]; } if ( bits[1] ) { o['id'] = bits[1]; } remaining = rm; continue; } if ( is_block ) { if ( (m = re_pba_padding_l.exec( remaining )) ) { st[ "padding-left" ] = ( m[1].length ) + "em"; remaining = remaining.slice( m[0].length ); continue; } if ( (m = re_pba_padding_r.exec( remaining )) ) { st[ "padding-right" ] = ( m[1].length ) + "em"; remaining = remaining.slice( m[0].length ); continue; } } // only for blocks: if ( is_img || is_block ) { if ( (m = re_pba_align.exec( remaining )) ) { var align = pba_align_lookup[ m[1] ]; if ( is_img ) { o[ 'align' ] = align; } else { st[ 'text-align' ] = align; } remaining = remaining.slice( m[0].length ); continue; } } // only for table cells if ( element === 'td' || element === 'tr' ) { if ( (m = re_pba_valign.exec( remaining )) ) { st[ "vertical-align" ] = pba_valign_lookup[ m[1] ]; remaining = remaining.slice( m[0].length ); continue; } } if ( element === 'td' ) { if ( (m = re_pba_colspan.exec( remaining )) ) { o[ "colspan" ] = m[1]; remaining = remaining.slice( m[0].length ); continue; } if ( (m = re_pba_rowspan.exec( remaining )) ) { o[ "rowspan" ] = m[1]; remaining = remaining.slice( m[0].length ); continue; } } } while ( m ); // collapse styles var s = []; for ( var v in st ) { s.push( v + ':' + st[v] ); } if ( s.length ) { o.style = s.join(';'); } else { delete o.style; } return remaining == input ? undefined : [ input.length - remaining.length, o ] ; } /* glyph parser */ function parse_glyphs ( src ) { if ( typeof src !== 'string' ) { return src; } // NB: order is important here ... return src // arrow .replace( /([^\-]|^)->/, '$1→' ) // arrow // dimensions .replace( re_dimsign, '$1×$2' ) // dimension sign // ellipsis .replace( /([^.]?)\.{3}/g, '$1…' ) // ellipsis // dashes .replace( re_emdash, '$1—$2' ) // em dash .replace( /( )-( )/g, '$1–$2' ) // en dash // legal marks .replace( re_trademark, '$1™' ) // trademark .replace( re_registered, '$1®' ) // registered .replace( re_copyright, '$1©' ) // copyright // double quotes .replace( re_double_prime, '$1″' ) // double prime .replace( re_closing_dquote, '$1”' ) // double closing quote .replace( /"/g, '“' ) // double opening quote // single quotes .replace( re_single_prime, '$1′' ) // single prime .replace( re_apostrophe, '$1’$2' ) // I'm an apostrophe .replace( re_closing_squote, '$1’' ) // single closing quote .replace( /'/g, '‘' ) ; } /* list parser */ function parse_list ( src, options ) { src = ribbon( src.replace( /(^|\n)[\t ]+/, '$1' ) ); var pad = function ( n ) { var s = '\n'; while ( n-- ) { s += '\t'; } return s; } , stack = [] , m , s ; while ( (m = re_list_item.exec( src )) ) { var item = [ 'li' ] , pba = parse_attr( m[2], 'li' ) ; if ( pba ) { m[2] = m[2].slice( pba[0] ); pba = pba[1]; } var dest_level = m[1].length , type = m[1].substr(-1) === '#' ? 'ol' : 'ul' , eqlev = stack.length === dest_level , new_li = null , lst , par , r ; // create nesting until we have correct level while ( stack.length < dest_level ) { lst = [ type, pad( stack.length + 1 ), (new_li = [ 'li' ]) ]; par = stack[ stack.length - 1 ]; if ( par ) { par.li.push( pad( stack.length ) ); par.li.push( lst ); } stack.push({ ul: lst, li: new_li }); } // remove nesting until we have correct level while ( stack.length > dest_level ) { r = stack.pop(); r.ul.push( pad( stack.length ) ); } par = stack[ stack.length - 1 ]; if ( !new_li ) { par.ul.push( pad( stack.length ), item ); par.li = item; } if ( pba ) { par.li.push( pba ); } Array.prototype.push.apply( par.li, parse_inline( m[2].trim(), options ) ); src.advance( m[0] ); } while ( stack.length ) { s = stack.pop(); s.ul.push( pad( stack.length ) ); } return s.ul; } /* table parser */ function parse_table ( src, options ) { src = ribbon( src.trim() ); var table = [ 'table' ] , row , inner , pba , more , m ; if ( (m = re_table_head.exec( src )) ) { // parse and apply table attr src.advance( m[0] ); pba = parse_attr( m[2], 'table' ); if ( pba ) { table.push( pba[1] ); } } while ( (m = re_table_row.exec( src )) ) { row = [ 'tr' ]; if ( m[1] && (pba = parse_attr( m[1], 'tr' )) ) { // FIXME: requires "\.\s?" -- else what ? row.push( pba[1] ); } table.push( '\n\t', row ); inner = ribbon( m[2] ); do { inner.save(); // cell loop var th = inner.startsWith( '_' ) , cell = [ th ? 'th' : 'td' ] ; if ( th ) { inner.advance( 1 ); } pba = parse_attr( inner, 'td' ); if ( pba ) { inner.advance( pba[0] ); cell.push( pba[1] ); // FIXME: don't do this if next text fails } if ( pba || th ) { var d = /^\.\s*/.exec( inner ); if ( d ) { inner.advance( d[0] ); } else { cell = [ 'td' ]; inner.load(); } } var mx = /^(==.*?==|[^\|])*/.exec( inner ); cell = cell.concat( parse_inline( mx[0], options ) ); row.push( '\n\t\t', cell ); more = inner.valueOf().charAt( mx[0].length ) === '|'; inner.advance( mx[0].length + 1 ); } while ( more ); row.push( '\n\t' ); src.advance( m[0] ); } table.push( '\n' ); return table; } /* inline parser */ function parse_inline ( src, options ) { src = ribbon( src ); var list = builder() , m , pba ; // loop do { src.save(); // linebreak -- having this first keeps it from messing to much with other phrases if ( src.startsWith( '\n' ) ) { src.advance( 1 ); if ( options.breaks ) { list.add( [ 'br' ] ); } list.add( '\n' ); continue; } // inline notextile if ( (m = /^==(.*?)==/.exec( src )) ) { src.advance( m[0] ); list.add( m[1] ); continue; } // lookbehind => /([\s>.,"'?!;:])$/ var behind = src.lookbehind( 1 ); var boundary = !behind || /^[\s>.,"'?!;:()]$/.test( behind ); // FIXME: need to test right boundary for phrases as well if ( (m = re_phrase.exec( src )) && ( boundary || m[1] ) ) { src.advance( m[0] ); var tok = m[2] , fence = m[1] , phrase_type = phrase_convert[ tok ] , code = phrase_type === 'code' ; if ( (pba = !code && parse_attr( src, phrase_type, tok )) ) { src.advance( pba[0] ); pba = pba[1]; } // FIXME: if we can't match the fence on the end, we should output fence-prefix as normal text // seek end var m_mid; var m_end; if ( fence === '[' ) { m_mid = '^(.*?)'; m_end = '(?:])'; } else if ( fence === '{' ) { m_mid = '^(.*?)'; m_end = '(?:})'; } else { var t1 = re.escape( tok.charAt(0) ); m_mid = ( code ) ? '^(\\S+|\\S+.*?\\S)' : '^([^\\s' + t1 + ']+|[^\\s' + t1 + '].*?\\S('+t1+'*))' ; m_end = '(?=$|[\\s.,"\'!?;:()«»„“”‚‘’])'; } var rx = re.compile( m_mid + '(' + re.escape( tok ) + ')' + m_end ); if ( (m = rx.exec( src )) && m[1] ) { src.advance( m[0] ); if ( code ) { list.add( [ phrase_type, m[1] ] ); } else { list.add( [ phrase_type, pba ].concat( parse_inline( m[1], options ) ) ); } continue; } // else src.load(); } // image if ( (m = re_image.exec( src )) || (m = re_image_fenced.exec( src )) ) { src.advance( m[0] ); pba = m[1] && parse_attr( m[1], 'img' ); var attr = pba ? pba[1] : { 'src':'' } , img = [ 'img', attr ] ; attr.src = m[2]; attr.alt = m[3] ? ( attr.title = m[3] ) : ''; if ( m[4] ) { // +cite causes image to be wraped with a link (or link_ref)? // TODO: support link_ref for image cite img = [ 'a', { 'href': m[4] }, img ]; } list.add( img ); continue; } // html comment if ( (m = re_html_comment.exec( src )) ) { src.advance( m[0] ); list.add( [ '!', m[1] ] ); continue; } // html tag // TODO: this seems to have a lot of overlap with block tags... DRY? if ( (m = re_html_tag.exec( src )) ) { src.advance( m[0] ); var tag = m[1] , single = m[3] || m[1] in html_singletons , element = [ tag ] , tail = m[4] ; if ( m[2] ) { element.push( parse_html_attr( m[2] ) ); } if ( single ) { // single tag list.add( element ).add( tail ); continue; } else { // need terminator // gulp up the rest of this block... var re_end_tag = re.compile( "^(.*?)()", 's' ); if ( (m = re_end_tag.exec( src )) ) { src.advance( m[0] ); if ( tag === 'code' ) { element.push( tail, m[1] ); } else if ( tag === 'notextile' ) { list.merge( parse_inline( m[1], options ) ); continue; } else { element = element.concat( parse_inline( m[1], options ) ); } list.add( element ); continue; } // end tag is missing, treat tag as normal text... } src.load(); } // footnote if ( (m = re_footnote.exec( src )) ) { src.advance( m[0] ); list.add( [ 'sup', { 'class': 'footnote', 'id': 'fnr' + m[1] }, [ 'a', { href: '#fn' + m[1] }, m[1] ] ] ); continue; } // caps / abbr if ( (m = re_caps.exec( src )) ) { src.advance( m[0] ); var caps = [ 'span', { 'class': 'caps' }, m[1] ]; if ( m[2] ) { caps = [ 'acronym', { 'title': m[2] }, caps ]; // FIXME: use , not acronym! } list.add( caps ); continue; } // links if ( (boundary && (m = re_link.exec( src ))) || (m = re_link_fenced.exec( src )) ) { src.advance( m[0].length ); var title = m[1].match( re_link_title ) , inner = ( title ) ? m[1].slice( 0, m[1].length - title[0].length ) : m[1] ; if ( (pba = parse_attr( inner, 'a' )) ) { inner = inner.slice( pba[0] ); pba = pba[1]; } else { pba = {}; } if ( title && !inner ) { inner = title[0]; title = ""; } pba.href = m[2]; if ( title ) { pba.title = title[1]; } list.add( [ 'a', pba ].concat( parse_inline( inner.replace( /^(\.?\s*)/, '' ), options ) ) ); continue; } // no match, move by all "uninteresting" chars m = /([a-zA-Z0-9,.':]+|\s+|[^\0])/.exec( src ); if ( m ) { list.add( m[0] ); } src.advance( m ? m[0].length || 1 : 1 ); } while ( src.valueOf() ); return list.get().map( parse_glyphs ); } /* block parser */ function parse_blocks ( src, options ) { var list = builder() , paragraph = function ( s, tag, pba, linebreak ) { tag = tag || 'p'; var out = []; s.split( /\n\n+/ ).forEach(function( bit, i ) { if ( tag === 'p' && /^\s/.test( bit ) ) { // no-paragraphs // WTF?: Why does Textile not allow linebreaks in spaced lines bit = bit.replace( /\n[\t ]/g, ' ' ).trim(); out = out.concat( parse_inline( bit, options ) ); } else { if ( linebreak && i ) { out.push( linebreak ); } out.push( pba ? [ tag, pba ].concat( parse_inline( bit, options ) ) : [ tag ].concat( parse_inline( bit, options ) ) ); } }); return out; } , link_refs = {} , m ; src = ribbon( src.replace( /^( *\n)+/, '' ) ); // loop while ( src.valueOf() ) { src.save(); // link_ref -- this goes first because it shouldn't trigger a linebreak if ( (m = re_link_ref.exec( src )) ) { src.advance( m[0] ); link_refs[ m[1] ] = m[2]; continue; } // add linebreak list.linebreak(); // named block if ( (m = re_block.exec( src )) ) { src.advance( m[0] ); var block_type = m[0] , pba = parse_attr( src, block_type ) ; if ( pba ) { src.advance( pba[0] ); pba = pba[1]; } if ( (m = /\.(\.?)(?:\s|(?=:))/.exec( src )) ) { // FIXME: this whole copy_pba seems rather strange? // slurp rest of block var extended = !!m[1]; m = ( extended ? re_block_extended : re_block_normal ).exec( src.advance( m[0] ) ); src.advance( m[0] ); // bq | bc | notextile | pre | h# | fn# | p | ### if ( block_type === 'bq' ) { var cite, inner = m[1]; if ( (m = /^:(\S+)\s+/.exec( inner )) ) { if ( !pba ) { pba = {}; } pba.cite = m[1]; inner = inner.slice( m[0].length ); } // RedCloth adds all attr to both: this is bad because it produces duplicate IDs list.add( [ 'blockquote', pba, '\n' ].concat( paragraph( inner, 'p', copy_pba(pba, { 'cite':1, 'id':1 }), '\n' ) ).concat(['\n']) ); } else if ( block_type === 'bc' ) { var sub_pba = ( pba ) ? copy_pba(pba, { 'id':1 }) : null; list.add( [ 'pre', pba, ( sub_pba ? [ 'code', sub_pba, m[1] ] : [ 'code', m[1] ] ) ] ); } else if ( block_type === 'notextile' ) { list.merge( parse_html( m[1] ) ); } else if ( block_type === '###' ) { // ignore the insides } else if ( block_type === 'pre' ) { // I disagree with RedCloth, but agree with PHP here: // "pre(foo#bar).. line1\n\nline2" prevents multiline preformat blocks // ...which seems like the whole point of having an extended pre block? list.add( [ 'pre', pba, m[1] ] ); } else if ( re_footnote_def.test( block_type ) ) { // footnote // Need to be careful: RedCloth fails "fn1(foo#m). footnote" -- it confuses the ID var fnid = block_type.replace( /\D+/g, '' ); if ( !pba ) { pba = {}; } pba['class'] = ( pba['class'] ? pba['class'] + ' ' : '' ) + 'footnote'; pba['id'] = 'fn' + fnid; list.add( [ "p", pba, [ 'a', { 'href': '#fnr' + fnid }, [ 'sup', fnid ] ], ' ' ].concat( parse_inline( m[1], options ) ) ); } else { // heading | paragraph list.merge( paragraph( m[1], block_type, pba, '\n' ) ); } continue; } else { src.load(); } } // HTML comment if ( (m = re_html_comment.exec( src )) ) { src.advance( m[0] + (/(?:\s*\n+)+/.exec( src ) || [])[0] ); list.add( [ '!', m[1] ] ); continue; } // block HTML if ( (m = re_html_tag_block.exec( src )) ) { var tag = m[1] , single = m[3] || tag in html_singletons , tail = m[4] ; // Unsurprisingly, all Textile implementations I have tested have trouble parsing simple HTML: // // "
    a\n
    b\n
    c\n
    d" // // I simply match them here as there is no way anyone is using nested HTML today, or if they // are, then this will at least output less broken HTML as redundant tags will get quoted. // Is block tag? ... if ( tag in allowed_blocktags ) { src.advance( m[0] ); var element = [ tag ]; if ( m[2] ) { element.push( parse_html_attr( m[2] ) ); } if ( single ) { // single tag // let us add the element and continue our quest... list.add( element ); continue; } else { // block // gulp up the rest of this block... var re_end_tag = re.compile( "^(.*?)(\\s*)()(\\s*)", 's' ); if ( (m = re_end_tag.exec( src )) ) { src.advance( m[0] ); if ( tag === 'pre' ) { element.push( tail ); element = element.concat( parse_html( m[1].replace( /\n+$/, '' ), { 'code': 1 } ) ); if ( m[2] ) { element.push( m[2] ); } list.add( element ); } else if ( tag === 'notextile' ) { element = parse_html( m[1].trim() ); list.merge( element ); } else if ( tag === 'script' || tag === 'noscript' ) { //element = parse_html( m[1].trim() ); element.push( tail + m[1] ); list.add( element ); } else { // These strange (and unnecessary) linebreak tests are here to get the // tests working perfectly. In reality, this doesn't matter one bit. if ( /\n/.test( tail ) ) { element.push( '\n' ); } if ( /\n/.test( m[1] ) ) { element = element.concat( parse_blocks( m[1], options ) ); } else { element = element.concat( parse_inline( m[1].replace( /^ +/, '' ), options ) ); } if ( /\n/.test( m[2] ) ) { element.push( '\n' ); } list.add( element ); } continue; } /*else { // end tag is missing, treat tag as normal text... }*/ } } src.load(); } // ruler if ( (m = re_ruler.exec( src )) ) { src.advance( m[0] ); list.add( [ 'hr' ] ); continue; } // list if ( (m = re_list.exec( src )) ) { src.advance( m[0] ); list.add( parse_list( m[0], options ) ); continue; } // table if ( (m = re_table.exec( src )) ) { src.advance( m[0] ); list.add( parse_table( m[1], options ) ); continue; } // paragraph m = re_block_normal.exec( src ); list.merge( paragraph( m[1], 'p', undefined, "\n" ) ); src.advance( m[0] ); } return list.get().map( fix_links, link_refs ); } // recurse the tree and swap out any "href" attributes function fix_links ( jsonml ) { if ( _isArray( jsonml ) ) { if ( jsonml[0] === 'a' ) { // found a link var attr = jsonml[1]; if ( typeof attr === "object" && 'href' in attr && attr.href in this ) { attr.href = this[ attr.href ]; } } for (var i=1,l=jsonml.length; i by default }; textile.setOptions = textile.setoptions = function ( opt ) { merge( textile.defaults, opt ); return this; }; textile.parse = textile.convert = textile; textile.html_parser = parse_html; textile.jsonml = function ( txt, opt ) { // get a throw-away copy of options opt = merge( merge( {}, textile.defaults ), opt || {} ); // parse and return tree return [ 'html' ].concat( parse_blocks( txt, opt ) ); }; textile.serialize = JSONML.toHTML; if ( typeof module !== 'undefined' && module.exports ) { module.exports = textile; } else { this.textile = textile; } }).call(function() { return this || (typeof window !== 'undefined' ? window : global); }());