87c27d2015-11-11Pontus Östlund #pike __REAL_VERSION__
47c3112016-05-10Pontus Östlund #require constant(Regexp.PCRE)
87c27d2015-11-11Pontus Östlund  //! This is a port of the Javascript Markdown parser //! @url{https://github.com/chjj/marked Marked@}. The only method needed to //! be used is @[parse()] which will transform Markdown text to HTML.
923a212016-11-05Martin Nilsson //! //! For a description on Markdown, go to the web page of the inventor of //! Markdown @url{https://daringfireball.net/projects/markdown/@}.
87c27d2015-11-11Pontus Östlund  import Regexp.PCRE; #define nl() (options->newline ? "\n" : "") // Cache for compiled regexps private mapping(string:object) __re_cache = ([]); // Returns the precompiled regexp PATTERN from cache if it's previously used. // Otherwise it compiles, caches and returns it. #define REGX(PATTERN, ARGS...) \ (__re_cache[PATTERN + #ARGS] || \ (__re_cache[PATTERN + #ARGS] = R(PATTERN, ARGS))) // Shortened regexp options protected constant RM = OPTION.MULTILINE; protected constant RI = OPTION.CASELESS; //! Convert markdown @[md] to html //! //! @param options //! @mapping //! @member bool "gfm" //! Enable Github Flavoured Markdown. (true) //! @member bool "tables" //! Enable GFM tables. Requires "gfm" (true) //! @member bool "breaks" //! Enable GFM "breaks". Requires "gfm" (false) //! @member bool "pedantic" //! Conform to obscure parts of markdown.pl as much as possible. //! Don't fix any of the original markdown bugs or poor behavior. (false) //! @member bool "sanitize" //! Sanitize the output. Ignore any HTML that has been input. (false) //! @member bool "mangle" //! Mangle (obfuscate) autolinked email addresses (true) //! @member bool "smart_lists" //! Use smarter list behavior than the original markdown. (true) //! @member bool "smartypants" //! Use "smart" typograhic punctuation for things like quotes and //! dashes. (false) //! @member string "header_prefix" //! Add prefix to ID attributes of header tags (empty) //! @member bool "xhtml" //! Generate self closing XHTML tags (false) //! @member bool "newline" //! Add a newline after tags. If false the output will be //! on one line (well, newlines in text will be kept). (false) //! @member Renderer "renderer" //! Use this renderer to render output. (Renderer) //! @endmapping string parse(string md, void|mapping options) { options = default_options + (options||([])); return Parser(options)->parse(Lexer(options)->lex(md)); } protected typedef mapping(string:R|mapping(string:R)) RuleMap; protected object noop = class { string source; array split2() { return 0; } string _sprintf(int t) { return "noop()"; } }(); protected function make_regexp(R|string regex, void|int opts) { regex = (string)regex; function(string,string,void|bool:function|R) fun, fun2; R rep = REGX("(^|[^[])\\^"); // In most generated regexps we only want to replace the first occurance // of `name` (and that's how replace in Javascript works by default). But // in Pike replace() will replace all occurances of `name` by default. We // work around that below, but in a few instances we want the global // replace() so we state that explicitly. fun = lambda (string name, R|string val, void|bool all) { if (!name || !sizeof(name)) { return R(regex, opts); } val = rep->replace((string)val, lambda (string a, string b) { return b; }); if (all) { regex = replace(regex, name, val); } else { regex = replace1(regex, name, val); } return fun; }; fun2 = fun; return fun; } //! Replaces the first occurance of @[from] in @[subject] to @[to] protected string replace1(string subject, string from, string to) { int p1 = search(subject, from); if (p1 > -1) { int p2 = search(subject, from, p1+1); if (p2 > -1) { subject = replace(subject[0..p2-1], from, to) + subject[p2..]; } else { subject = replace(subject, from, to); } } return subject; }
47c3112016-05-10Pontus Östlund //! HTML encode @tt{<>"'@}. If @[enc] is @tt{true@} @tt{&@} will also be encoded
87c27d2015-11-11Pontus Östlund protected string encode_html(string html, void|bool enc) { return replace(REGX((!enc ? "&(?!#?\\w+;)" : "&"))->replace(html, "&amp;"), ([ "<" : "&lt;", ">" : "&gt;", "\"" : "&quot;", "'" : "&#39;" ])); } protected string decode_html(string html) { return REGX("&([#\\w]+);")->replace(html, lambda (string a, string b) { b = lower_case(b); if (b == "colon") return ":"; if (b[0] == '#') { return b[1] == 'x' ? sprintf("%x", (int)b[2..]) : sprintf("%c", (int)b[1..]); } return ""; }); } protected constant default_options = ([ "gfm" : true, "tables" : true, "breaks" : false, "pedantic" : false, "sanitize" : false, "sanitizer" : UNDEFINED, "mangle" : true, "smart_lists" : true, "silent" : false, "highlight" : UNDEFINED, "lang_prefix" : "lang-", "smartypants" : false, "header_prefix" : "", "renderer" : UNDEFINED, "xhtml" : false, "newline" : true ]); protected class R { inherit Widestring : ws; protected string src; protected void create(R|string re, void|int options) { src = (string)re; ws::create(src, options); } string `source() { return src; } array(string) split_string(string subject) { array(array(int)) offsets = ({}); ws::matchall(subject, lambda (array a, array b) { offsets += ({ b }); }); if (!sizeof(offsets)) { return 0; } offsets += ({ 0 }); array(string) ret = ({ }); array(int) c; int start, stop; for (int i; i < sizeof(offsets); i++) { c = offsets[i]; stop = c && c[0] || sizeof(subject); ret += ({ subject[start .. stop] }); if (!c) break; start = c[1]; } return ret; } array(string) split_all(string subject) { array(string) ret = ({}); ws::matchall(subject, lambda (array a, array b) { ret += ({ a[0] }); }); return ret; } string _sprintf(int t) { switch (t) { case 's': return source; case 'O': return "object(" + source + ")"; default: error("Unhandled sprintf_format (%%%c)! ", t); break; } } string cast(string how) { return src; } } private RuleMap block_rules; RuleMap get_block_rules() { if (block_rules) return block_rules; RuleMap block = ([ "newline" : R("^\\n+"), "code" : R("^( {4}[^\\n]+\\n*)+"), "hr" : R("^( *[-*_]){3,} *(?:\\n+|$)"), "heading" : R("^ *(#{1,6}) *([^\\n]+?) *#* *(?:\\n+|$)"), "lheading" : R("^([^\\n]+)\\n *(=|-){2,} *(?:\\n+|$)"), "blockquote" : R("^( *>[^\\n]+(\\n(?!def)[^\\n]+)*\\n*)+"), "list" : R("^( *)(bull) [\\s\\S]+?(?:hr|def|\\n{2,}(?! )(?!\\1bull )\\n*|\\s*$)"), "html" : R("^ *(?:comment *(?:\\n|\\s*$)|closed *(?:\\n{2,}|\\s*$)|closing *(?:\\n{2,}|\\s*$))"), "def" : R("^ *\\[([^\\]]+)\\]: *<?([^\\s>]+)>?(?: +[\"(]([^\\n]+)[\")])? *(?:\\n+|$)"), "paragraph" : R("^((?:[^\\n]+\\n?(?!hr|heading|lheading|blockquote|tag|def))+)\\n*"), "text" : R("^[^\\n]+"), "table" : noop, "fences" : noop, "nptable" : noop, ]); block->bullet = R("(?:[*+-]|\\d+\\.)"); block->item = make_regexp("^( *)(bull) [^\\n]*(?:\\n(?!\\1bull )[^\\n]*)*", RM) ("bull", block->bullet, true) (); block->list = make_regexp(block->list) ("bull", block->bullet, true) ("hr", "\\n+(?=\\1?(?:[-*_] *){3,}(?:\\n+|$))") ("def", "\\n+(?=" + block->def->source + ")") (); block->blockquote = make_regexp(block->blockquote) ("def", block->def) (); block->_tag = R("(?!(?:" "a|em|strong|small|s|cite|q|dfn|abbr|data|time|code" "|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo" "|span|br|wbr|ins|del|img)\\b)\\w+(?!:/|[^\\w\\s@]*@)\\b"); block->html = make_regexp(block->html) ("comment", "<!--[\\s\\S]*?-->") ("closed", "<(tag)[\\s\\S]+?</\\1>") ("closing", "<tag(?:\"[^\"]*\"|'[^']*'|[^'\">])*?>") ("tag", block->_tag, true) (); block->paragraph = make_regexp(block->paragraph) ("hr", block->hr) ("lheading", block->lheading) ("heading", block->heading) ("blockquote", block->blockquote) ("tag", "<" + (string)block->_tag, true) ("def", block->def) (); block->normal = copy_value(block); block->gfm = block->normal + ([ "fences" : R("^ *(`{3,}|~{3,})[ \\.]*(\\S+)? *\\n([\\s\\S]*?)\\s*\\1 *(?:\\n+|$)"), "paragraph" : R("^"), "heading" : R("^ *(#{1,6}) +([^\\n]+?) *#* *(?:\\n+|$)") ]); block->gfm->paragraph = make_regexp(block->paragraph) ("(?!", "(?!" + replace1(block->gfm->fences->source, "\\1", "\\2") + "|" + replace1(block->list->source, "\\1", "\\3") + "|") (); block->tables = block->gfm + ([ "nptable" : R("^ *(\\S.*\\|.*)\\n *([-:]+ *\\|[-| :]*)\\n((?:.*\\|.*(?:\\n|$))*)\\n*"), "table" : R("^ *\\|(.+)\\n *\\|( *[-:]+[-| :]*)\\n((?: *\\|.*(?:\\n|$))*)\\n*") ]); return block_rules = block; } private RuleMap inline_rules; RuleMap get_inline_rules() { if (inline_rules) return inline_rules; RuleMap inl = ([ "escape" : R("^\\\\([\\\\`*{}\\[\\]()#+\\-.!_>])"), "autolink" : R("^<([^ >]+(@|:/)[^ >]+)>"), "tag" : R("^<!--[\\s\\S]*?-->|^</?\\w+(?:\"[^\"]*\"|'[^']*'|[^'\">])*?>"), "link" : R("^!?\\[(inside)\\]\\(href\\)"), "reflink" : R("^!?\\[(inside)\\]\\s*\\[([^\\]]*)\\]"), "nolink" : R("^!?\\[((?:\\[[^\\]]*\\]|[^\\[\\]])*)\\]"), "strong" : R("^__([\\s\\S]+?)__(?!_)|^\\*\\*([\\s\\S]+?)\\*\\*(?!\\*)"), "em" : R("^\\b_((?:[^_]|__)+?)_\\b|^\\*((?:\\*\\*|[\\s\\S])+?)\\*(?!\\*)"), "code" : R("^(`+)\\s*([\\s\\S]*?[^`])\\s*\\1(?!`)"), "br" : R("^ {2,}\\n(?!\\s*$)"), "text" : R("^[\\s\\S]+?(?=[\\\\<!\\[_*`]| {2,}\\n|$)"), "del" : noop, "url" : noop ]); inl->_inside = R("(?:\\[[^\\]]*\\]|[^\\[\\]]|\\](?=[^\\[]*\\]))*"); inl->_href = R("\\s*<?([\\s\\S]*?)>?(?:\\s+['\"]([\\s\\S]*?)['\"])?\\s*"); inl->link = make_regexp(inl->link) ("inside", inl->_inside) ("href", inl->_href) (); inl->reflink = make_regexp(inl->reflink) ("inside", inl->_inside) (); inl->normal = copy_value(inl); inl->gfm = inl->normal + ([ "escape" : make_regexp(inl->escape)("])", "~|])")(), "url" : R("^(https?://[^\\s<]+[^<.,:;\"')\\]\\s])"), "del" : R("^~~(?=\\S)([\\s\\S]*?\\S)~~"), "text" : make_regexp(inl->text) ("]|", "~]|") ("|", "|https?://|") () ]); inl->breaks = inl->gfm + ([ "br" : make_regexp(inl->br)("{2,}", "*")(), "text" : make_regexp(inl->gfm->text)("{2,}", "*")() ]); return inline_rules = inl; } protected class Parser { protected mapping options; protected array tokens = ({}); protected mapping token; protected Renderer renderer; protected InlineLexer inline_lexer; protected void create(void|mapping opts) { options = opts || default_options; if (!options->renderer) options->renderer = Renderer; renderer = options->renderer = options->renderer(options); } private int pos = -1; #define next() (token = tokens[++pos]) #define peek() tokens[pos + 1] string parse(Lexer src) { token = UNDEFINED; inline_lexer = InlineLexer(src->links, options); tokens = src->tokens; tokens += ({ 0 }); // sentinel String.Buffer out = String.Buffer(); function add = out->add; while (next()) { add(tok()); } return out->get(); } protected string parse_text() { string body = token->text; while (peek()->type == "text") { body += (options->newline ? "\n" : " ") + (next()->text||""); } return inline_lexer->output(body); } protected string tok() { switch (token->type) { case "space": return ""; case "hr": return renderer->hr(); case "heading": return renderer->heading(inline_lexer->output(token->text), token->depth, token->text); case "code": return renderer->code(token->text, token->lang, token->escaped); case "table": string header = "", body = "", cell = ""; array(string) row; for (int i; i < sizeof(token->header); i++) { cell += renderer->tablecell( inline_lexer->output(token->header[i]), ([ "header" : true, "align" : token->align[i] ]) ); } header += renderer->tablerow(cell); for (int i; i < sizeof(token->cells); i++) { row = token->cells[i]; cell = ""; for (int j; j < sizeof(row); j++) { cell += renderer->tablecell( inline_lexer->output(row[j]), ([ "header" : false, "align" : token->align[j] ]) ); } body += renderer->tablerow(cell); } return renderer->table(header, body); case "blockquote_start": body = ""; while (next()->type != "blockquote_end") { body += tok(); } return renderer->blockquote(body); case "list_start": bool ordered = token->ordered; body = ""; while (next()->type != "list_end") { body += tok(); } return renderer->list(body, ordered); case "list_item_start": body = ""; while (next()->type != "list_item_end") { body += token->type == "text" ? parse_text() : tok(); } return renderer->listitem(body); case "loose_item_start": body = ""; while (next()->type != "list_item_end") { body += tok(); } return renderer->listitem(body); case "html": string html = !token->pre && !options->pedantic ? inline_lexer->output(token->text) : token->text; return renderer->html(html) + nl(); case "paragraph": return renderer->paragraph(inline_lexer->output(token->text)); case "text": return renderer->paragraph(parse_text()); default: error("Unhandled token: %O\n", token); break; } } } #define SRC_SUBSTR() src = src[sizeof(cap[0]) .. ] #define PUSH_TOKEN(T) _tokens += ({ T }) protected class Lexer { protected array(mapping) _tokens = ({}); protected mapping token_links = ([]); protected mapping options; protected RuleMap rules, block; mapping `links() { return token_links; } array(mapping) `tokens() { return _tokens; } protected void create(void|mapping opts) { options = opts || default_options; block = get_block_rules(); rules = block->normal; if (options->gfm) { if (options->tables) { rules = get_block_rules()->tables; } else { rules = get_block_rules()->gfm; } } } object_program lex(string src) { src = replace(src, ([ "\r\n" : "\n", "\r" : "\n", "\t" : " ", "\u00a0" : " ", "\u2424" : "\n" ])); tokenize(src, true); return this; } protected array tokenize(string src, bool|int(0..1) top, void|bool bq) { src = REGX("^ +$", RM)->replace(src, ""); array(string) cap; bool next; while (src && sizeof(src)) { // Newline, (assignment) if (cap = rules->newline->split2(src)) { SRC_SUBSTR(); if (sizeof(cap[0]) > 1) { PUSH_TOKEN(([ "type" : "space" ])); } } // Code if (cap = rules->code->split2(src)) { SRC_SUBSTR(); string tmp = REGX("^ {4}", RM)->replace(cap[0], ""); PUSH_TOKEN(([ "type" : "code", "text" : !options->pedantic ? REGX("\n+$")->replace(tmp, "") : tmp ])); continue; } // fences (gfm) if (cap = rules->fences->split2(src)) { SRC_SUBSTR(); PUSH_TOKEN(([ "type": "code", "lang": cap[2], "text": has_index(cap, 3) && cap[3] || "" ])); continue; } // heading if (cap = rules->heading->split2(src)) { SRC_SUBSTR(); PUSH_TOKEN(([ "type" : "heading", "depth" : sizeof(cap[1]), "text" : cap[2] ])); continue; } // table no leading pipe (gfm) if (top && (cap = rules->nptable->split2(src))) { SRC_SUBSTR(); mapping item = ([ "type" : "table", "header" : REGX(" *\\| *")->split_string(REGX("^ *| *\\| *$") ->replace(cap[1], "")), "align" : REGX(" *\\| *")->split_string(REGX("^ *|\\| *$") ->replace(cap[2], "")), "cells" : REGX("\\n$")->replace(cap[3], "") / "\n" ]); for (int i; i < sizeof(item->align); i++) { if (REGX("^ *-+: *$")->match(item->align[i])) { item->align[i] = "right"; } else if (REGX("^ *:-+: *$")->match(item->align[i])) { item->align[i] = "center"; } else if (REGX("^ *:-+ *$")->match(item->align[i])) {
c89a402016-12-28Arne Goedeke  item->align[i] = "left";
87c27d2015-11-11Pontus Östlund  } else { item->align[i] = UNDEFINED; } } for (int i; i < sizeof(item->cells); i++) { item->cells[i] = REGX(" *\\| *")->split_string(item->cells[i]); } PUSH_TOKEN(item); continue; } // lheading if (cap = rules->lheading->split2(src)) { SRC_SUBSTR(); PUSH_TOKEN(([ "type" : "heading", "depth" : cap[2] == "=" ? 1 : 2, "text" : cap[1] ])); continue; } // hr if (cap = rules->hr->split2(src)) { SRC_SUBSTR(); PUSH_TOKEN(([ "type": "hr" ])); continue; } // blockquote if (cap = rules->blockquote->split2(src)) { SRC_SUBSTR(); PUSH_TOKEN(([ "type": "blockquote_start" ])); string tmp = REGX("^ *> ?", RM)->replace(cap[0], ""); // Pass `top` to keep the current // "toplevel" state. This is exactly // how markdown.pl works. tokenize(tmp, top, true); PUSH_TOKEN(([ "type": "blockquote_end" ])); continue; } // list if (cap = rules->list->split2(src)) { string bull = cap[2]; SRC_SUBSTR(); PUSH_TOKEN(([ "type" : "list_start", "ordered" : sizeof(bull) > 1 ])); // Get each top-level item. cap = rules->item->split_all(cap[0]); //cap[0].match(rules->item); next = false; int l = sizeof(cap); int i = 0; for (; i < l; i++) { string item = cap[i]; // Remove the list item's bullet // so it is seen as the next token. int space = sizeof(item); item = REGX("^ *([*+-]|\\d+\\.) +")->replace(item, ""); // Outdent whatever the // list item contains. Hacky. if (search(item, "\n ") > -1) { space -= sizeof(item); item = !options->pedantic ? REGX("^ {1," + space + "}", RM)->replace(item, "") : REGX("^ {1,4}", RM)->replace(item, ""); } // Determine whether the next list item belongs here. // Backpedal if it does not belong in this list. if (options->smart_lists && i != l - 1) { string b = block->bullet->split2(cap[i+1])[0]; if (bull != b && !(sizeof(bull) > 1 && sizeof(b) > 1)) { src = (cap[i+1..] * "\n") + src; i = l - 1; } } // Determine whether item is loose or not. // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/ // for discount behavior. bool loose = next || REGX("\\n\\n(?!\\s*$)")->match(item); if (i != l - 1) { next = item[-1] == '\n'; if (!loose) loose = next; } PUSH_TOKEN(([ "type" : loose ? "loose_item_start" : "list_item_start" ])); // Recurse. tokenize(item, false, bq); PUSH_TOKEN(([ "type" : "list_item_end" ])); } PUSH_TOKEN(([ "type" : "list_end" ])); continue; } // html if (cap = rules->html->split2(src)) { SRC_SUBSTR(); PUSH_TOKEN(([ "type" : options->sanitize ? "paragraph" : "html", "pre" : !options->sanitizer && (cap[1] == "pre" || cap[1] == "script" || cap[1] == "style"), "text" : cap[0] ])); continue; } // def if ((!bq && top) && (cap = rules->def->split2(src))) { SRC_SUBSTR(); token_links[lower_case(cap[1])] = ([ "href": cap[2], "title": cap[3] ]); continue; } // table (gfm) if (top && (cap = rules->table->split2(src))) { SRC_SUBSTR(); mapping item = ([ "type" : "table", "header" : REGX(" *\\| *")->split_string(REGX("^ *| *\\| *$") ->replace(cap[1], "")), "align" : REGX(" *\\| *")->split_string(REGX("^ *|\\| *$") ->replace(cap[2], "")), "cells" : REGX("(?: *\\| *)?\\n$")->replace(cap[3], "") / "\n" ]); for (int i; i < sizeof(item->align); i++) { if (REGX("^ *-+: *$")->match(item->align[i])) { item->align[i] = "right"; } else if (REGX("^ *:-+: *$")->match(item->align[i])) { item->align[i] = "center"; } else if (REGX("^ *:-+ *$")->match(item->align[i])) { item->align[i] = "left"; } else { item->align[i] = UNDEFINED; } } for (int i; i < sizeof(item->cells); i++) { item->cells[i] = REGX(" *\\| *")->split_string( REGX("^ *\\| *| *\\| *$")->replace(item->cells[i], "")); } PUSH_TOKEN(item); continue; } // top-level paragraph if (top && (cap = rules->paragraph->split2(src))) { SRC_SUBSTR(); PUSH_TOKEN(([ "type" : "paragraph", "text" : (cap[1][-1] == '\n') ? cap[1][..<1] : cap[1] ])); continue; } // text if (cap = rules->text->split2(src)) { // Top-level should never reach here. SRC_SUBSTR(); PUSH_TOKEN(([ "type": "text", "text": cap[0] ])); continue; } if (sizeof(src)) { string tmp = src; if (sizeof(tmp) > 30) { tmp = tmp[..30] + "[...]"; } tmp = "[...]" + tmp; error("'Infinite loop%s! ", tmp); } } return _tokens; } } protected class InlineLexer { protected mapping options, rules, links; protected Renderer renderer; protected void create(mapping lnks, void|mapping opts) { options = opts || default_options; links = lnks; rules = get_inline_rules()->normal; renderer = options->renderer || Renderer; if (!links) { error("Tokens array requires a `links` property! "); } if (options->gfm) { if (options->breaks) { rules = get_inline_rules()->breaks; } else { rules = get_inline_rules()->gfm; } } else if (options->pedantic) { rules = get_inline_rules()->pedantic; } } private bool in_link = false; string output(string src) { String.Buffer buf = String.Buffer(); function add = buf->add; string link, href, text; array cap; while (src && sizeof(src)) { // escape if (cap = rules->escape->split2(src)) { SRC_SUBSTR(); add(cap[1]); continue; } // autolink if (cap = rules->autolink->split2(src)) { SRC_SUBSTR(); if (cap[2] == "@") { text = cap[1][6] == ':' ? mangle(cap[1][7..]) : mangle(cap[1]); href = mangle("mailto:") + text; } else { text = encode_html(cap[1]); href = text; } add(renderer->link(href, UNDEFINED, text)); continue; } // url (gfm) if (!in_link && (cap = rules->url->split2(src))) { SRC_SUBSTR(); text = encode_html(cap[1]); href = text; add(renderer->link(href, UNDEFINED, text)); continue; } // tag if (cap = rules->tag->split2(src)) { if (!in_link && REGX("^<a ", RI)->match(cap[0])) { in_link = true; } else if (in_link && REGX("^<\\/a>", RI)->match(cap[0])) { in_link = false; } SRC_SUBSTR(); add(options->sanitize ? options->sanitizer ? options->sanitizer(cap[0]) : encode_html(cap[0]) : cap[0]); continue; } // link if (cap = rules->link->split2(src)) { SRC_SUBSTR(); in_link = true; add(output_link(cap, ([ "href" : cap[2], "title" : cap[3] ]))); in_link = false; continue; } // reflink, nolink if ((cap = rules->reflink->split2(src)) || (cap = rules->nolink->split2(src))) { SRC_SUBSTR(); link = sizeof(cap) > 2 && cap[2] || REGX("\\s+")->replace(cap[1], " "); mapping lnk = links[lower_case(link)]; if (!lnk || !lnk->href) { add(cap[0][0..0]); src = cap[0][1..] + src; continue; } in_link = true; add(output_link(cap, lnk)); in_link = false; continue; } // strong if (cap = rules->strong->split2(src)) { SRC_SUBSTR(); add(renderer->strong(output(cap[2] || cap[1]))); continue; } // em if (cap = rules->em->split2(src)) { SRC_SUBSTR(); add(renderer->em(output(cap[2] || cap[1]))); continue; } // code if (cap = rules->code->split2(src)) { SRC_SUBSTR(); add(renderer->codespan(encode_html(cap[2], true))); continue; } // br if (cap = rules->br->split2(src)) { SRC_SUBSTR(); add(renderer->br()); continue; } // del (gfm) if (cap = rules->del->split2(src)) { SRC_SUBSTR(); add(renderer->del(output(cap[1]))); continue; } // text if (cap = rules->text->split2(src)) { SRC_SUBSTR(); add(renderer->text(encode_html(smartypants(cap[0])))); continue; } if (sizeof(src)) error("Inifinite loop! "); } return buf->get(); } protected string smartypants(string text) { if (!options->smartypants) return text; // em-dashes, en-dashes text = replace(text, ([ "---" : "&mdash;", "--" : "&ndash;" ])); // opening singles
6cbaa32016-08-21Henrik Grubbström (Grubba)  text = REGX("(^|[-—/(\\[{\"\\s])'")->replace(text,
87c27d2015-11-11Pontus Östlund  lambda (string a, string b) { return b + "&lsquo;"; }); // closing singles & apostrophes text = replace(text, "'", "&rsquo;"); // opening doubles text = REGX("(^|[-—/(\\[{‘\\s])\"")->replace(text, lambda (string a, string b) { return b + "&ldquo;"; }); // closing doubles text = replace(text, "\"", "&rdquo;"); // ellipses text = REGX("\\.{3}")->replace(text, "&hellip;"); return text; } protected string output_link(array cap, mapping link) { string href = encode_html(link->href); string title = link->title ? encode_html(link->title) : UNDEFINED; return cap[0][0] != '!' ? renderer->link(href, title, output(cap[1])) : renderer->image(href, title, encode_html(cap[1])); } protected string mangle(string s) { if (!options->mangle) return s; string out = ""; int l = sizeof(s), i = 0; string ch; for (; i < l; i++) { ch = s[i..i]; if (random(1.0) > 0.5 || s[i] == '@') { ch = sprintf("&#x%x;", s[i]); } out += ch; } return out; } } protected class Renderer { protected mapping options; protected void create(void|mapping opts) { options = opts; } string code(string code, string lang, bool escaped) { if (options->highlight) { string out = options->highlight(code, lang); if (out && out != code) { code = out; escaped = true; } } if (!lang) { return sprintf("<pre><code>%s%s</code></pre>%[1]s", !escaped ? encode_html(code, true) : code, nl()); } return sprintf( "<pre><code class='%s%s'>%s%s</code></pre>%[3]s", options->lang_prefix, encode_html(lang), !escaped ? encode_html(code, true) : code, nl() ); } string blockquote(string text) { return sprintf("<blockquote>%s%s</blockquote>%[0]s", nl(), text); } string hr() { return options->xhtml ? "<hr/>" + nl() : "<hr>" + nl(); } string heading(string text, int level, string raw) { return "<h" + level + " id='" + options->header_prefix + lower_case(REGX("[^\\w]+")->replace(raw, "-")) + "'>" + text + "</h" + level + ">" + nl(); } string list(string body, void|bool ordered) { return sprintf("<%s>%s%s</%[0]s>%[1]s", ordered ? "ol" : "ul", nl(), body); } string listitem(string text) { return sprintf("<li>%s</li>%s", text, nl()); } string paragraph(string text) { return sprintf("<p>%s</p>%s", text, nl()); } string table(string header, string body) { return sprintf( "<table>%s<thead>%[0]s%s</thead>%[0]s" "<tbody>%[0]s%s</tbody>%[0]s</table>%[0]s", nl(), header, body ); } string tablerow(string row) { return sprintf("<tr>%s%s</tr>%[0]s", nl(), row); } string tablecell(string cell, mapping flags) { return sprintf( "<%s%s>%s</%[0]s>%s", flags->header ? "th" : "td", flags->align ? " style='text-align:" + flags->align + "'" : "", cell, nl() ); } string html(string text) { return text; } string text(string t) { return t; } string strong(string t) { return sprintf("<strong>%s</strong>", t); } string em(string t) { return sprintf("<em>%s</em>", t); } string del(string t) { return sprintf("<del>%s</del>", t); } string codespan(string t) { return sprintf("<code>%s</code>", t); } string br() { return options->xhtml ? "<br/>" : "<br>"; } string link(string href, string title, string text) { if (options->sanitize) { mixed e = catch { string t = Protocols.HTTP.uri_decode(decode_html(href)); t = REGX("[^\\w:]")->replace(t, ""); if (has_prefix(t, "javascript:") || has_prefix(t, "vbscript:")) { return ""; } }; if (e) { return ""; } } return sprintf( "<a href='%s'%s>%s</a>", href, title ? " title='" + (title) + "'" : "", text ); } string image(string url, string title, string text) { string i = sprintf("<img src='%s' alt='%s'", url, text); if (text) i += sprintf(" title='%s'", text); return i + (options->xhtml ? "/>" : ">"); } }