# coding: utf-8 """ mistune ~~~~~~~ The fastest markdown parser in pure Python with renderer feature. :copyright: (c) 2014 - 2018 by Hsiaoming Yang. """ import re import inspect __version__ = '0.8.4' __author__ = 'Hsiaoming Yang <me@lepture.com>' __all__ = [ 'BlockGrammar', 'BlockLexer', 'InlineGrammar', 'InlineLexer', 'Renderer', 'Markdown', 'markdown', 'escape', ] _key_pattern = re.compile(r'\s+') _nonalpha_pattern = re.compile(r'\W') _escape_pattern = re.compile(r'&(?!#?\w+;)') _newline_pattern = re.compile(r'\r\n|\r') _block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M) _block_code_leading_pattern = re.compile(r'^ {4}', re.M) _inline_tags = [ 'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data', 'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark', 'ruby', 'rt', 'rp', 'bdi', 'bdo', 'span', 'br', 'wbr', 'ins', 'del', 'img', 'font', ] _pre_tags = ['pre', 'script', 'style'] _valid_end = r'(?!:/|[^\w\s@]*@)\b' _valid_attr = r'''\s*[a-zA-Z\-](?:\s*\=\s*(?:"[^"]*"|'[^']*'|[^\s'">]+))?''' _block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end) _scheme_blacklist = ('javascript:', 'vbscript:') def _pure_pattern(regex): pattern = regex.pattern if pattern.startswith('^'): pattern = pattern[1:] return pattern def _keyify(key): key = escape(key.lower(), quote=True) return _key_pattern.sub(' ', key) def escape(text, quote=False, smart_amp=True): """Replace special characters "&", "<" and ">" to HTML-safe sequences. The original cgi.escape will always escape "&", but you can control this one for a smart escape amp. :param quote: if set to True, " and ' will be escaped. :param smart_amp: if set to False, & will always be escaped. """ if smart_amp: text = _escape_pattern.sub('&', text) else: text = text.replace('&', '&') text = text.replace('<', '<') text = text.replace('>', '>') if quote: text = text.replace('"', '"') text = text.replace("'", ''') return text def escape_link(url): """Remove dangerous URL schemes like javascript: and escape afterwards.""" lower_url = url.lower().strip('\x00\x1a \n\r\t') for scheme in _scheme_blacklist: if re.sub(r'[^A-Za-z0-9\/:]+', '', lower_url).startswith(scheme): return '' return escape(url, quote=True, smart_amp=False) def preprocessing(text, tab=4): text = _newline_pattern.sub('\n', text) text = text.expandtabs(tab) text = text.replace('\u2424', '\n') pattern = re.compile(r'^ +$', re.M) return pattern.sub('', text) class BlockGrammar(object): """Grammars for block level tokens.""" def_links = re.compile( r'^ *\[([^^\]]+)\]: *' # [key]: r'<?([^\s>]+)>?' # <link> or link r'(?: +["(]([^\n]+)[")])? *(?:\n+|$)' ) def_footnotes = re.compile( r'^\[\^([^\]]+)\]: *(' r'[^\n]*(?:\n+|$)' # [^key]: r'(?: {1,}[^\n]*(?:\n+|$))*' r')' ) newline = re.compile(r'^\n+') block_code = re.compile(r'^( {4}[^\n]+\n*)+') fences = re.compile( r'^ *(`{3,}|~{3,}) *([^`\s]+)? *\n' # ```lang r'([\s\S]+?)\s*' r'\1 *(?:\n+|$)' # ``` ) hrule = re.compile(r'^ {0,3}[-*_](?: *[-*_]){2,} *(?:\n+|$)') heading = re.compile(r'^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)') lheading = re.compile(r'^([^\n]+)\n *(=|-)+ *(?:\n+|$)') block_quote = re.compile(r'^( *>[^\n]+(\n[^\n]+)*\n*)+') list_block = re.compile( r'^( *)(?=[*+-]|\d+\.)(([*+-])?(?:\d+\.)?) [\s\S]+?' r'(?:' r'\n+(?=\1?(?:[-*_] *){3,}(?:\n+|$))' # hrule r'|\n+(?=%s)' # def links r'|\n+(?=%s)' # def footnotes\ r'|\n+(?=\1(?(3)\d+\.|[*+-]) )' # heterogeneous bullet r'|\n{2,}' r'(?! )' r'(?!\1(?:[*+-]|\d+\.) )\n*' r'|' r'\s*$)' % ( _pure_pattern(def_links), _pure_pattern(def_footnotes), ) ) list_item = re.compile( r'^(( *)(?:[*+-]|\d+\.) [^\n]*' r'(?:\n(?!\2(?:[*+-]|\d+\.) )[^\n]*)*)', flags=re.M ) list_bullet = re.compile(r'^ *(?:[*+-]|\d+\.) +') paragraph = re.compile( r'^((?:[^\n]+\n?(?!' r'%s|%s|%s|%s|%s|%s|%s|%s|%s' r'))+)\n*' % ( _pure_pattern(fences).replace(r'\1', r'\2'), _pure_pattern(list_block).replace(r'\1', r'\3'), _pure_pattern(hrule), _pure_pattern(heading), _pure_pattern(lheading), _pure_pattern(block_quote), _pure_pattern(def_links), _pure_pattern(def_footnotes), '<' + _block_tag, ) ) block_html = re.compile( r'^ *(?:%s|%s|%s) *(?:\n{2,}|\s*$)' % ( r'<!--[\s\S]*?-->', r'<(%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_block_tag, _valid_attr), r'<%s(?:%s)*?\s*\/?>' % (_block_tag, _valid_attr), ) ) table = re.compile( r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*' ) nptable = re.compile( r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*' ) text = re.compile(r'^[^\n]+') class BlockLexer(object): """Block level lexer for block grammars.""" grammar_class = BlockGrammar default_rules = [ 'newline', 'hrule', 'block_code', 'fences', 'heading', 'nptable', 'lheading', 'block_quote', 'list_block', 'block_html', 'def_links', 'def_footnotes', 'table', 'paragraph', 'text' ] list_rules = ( 'newline', 'block_code', 'fences', 'lheading', 'hrule', 'block_quote', 'list_block', 'block_html', 'text', ) footnote_rules = ( 'newline', 'block_code', 'fences', 'heading', 'nptable', 'lheading', 'hrule', 'block_quote', 'list_block', 'block_html', 'table', 'paragraph', 'text' ) def __init__(self, rules=None, **kwargs): self.tokens = [] self.def_links = {} self.def_footnotes = {} if not rules: rules = self.grammar_class() self.rules = rules self._max_recursive_depth = kwargs.get('max_recursive_depth', 6) self._list_depth = 0 self._blockquote_depth = 0 def __call__(self, text, rules=None): return self.parse(text, rules) def parse(self, text, rules=None): text = text.rstrip('\n') if not rules: rules = self.default_rules def manipulate(text): for key in rules: rule = getattr(self.rules, key) m = rule.match(text) if not m: continue getattr(self, 'parse_%s' % key)(m) return m return False # pragma: no cover while text: m = manipulate(text) if m is not False: text = text[len(m.group(0)):] continue if text: # pragma: no cover raise RuntimeError('Infinite loop at: %s' % text) return self.tokens def parse_newline(self, m): length = len(m.group(0)) if length > 1: self.tokens.append({'type': 'newline'}) def parse_block_code(self, m): # clean leading whitespace code = _block_code_leading_pattern.sub('', m.group(0)) self.tokens.append({ 'type': 'code', 'lang': None, 'text': code, }) def parse_fences(self, m): self.tokens.append({ 'type': 'code', 'lang': m.group(2), 'text': m.group(3), }) def parse_heading(self, m): self.tokens.append({ 'type': 'heading', 'level': len(m.group(1)), 'text': m.group(2), }) def parse_lheading(self, m): """Parse setext heading.""" self.tokens.append({ 'type': 'heading', 'level': 1 if m.group(2) == '=' else 2, 'text': m.group(1), }) def parse_hrule(self, m): self.tokens.append({'type': 'hrule'}) def parse_list_block(self, m): bull = m.group(2) self.tokens.append({ 'type': 'list_start', 'ordered': '.' in bull, }) self._list_depth += 1 if self._list_depth > self._max_recursive_depth: self.tokens.append({'type': 'list_item_start'}) self.parse_text(m) self.tokens.append({'type': 'list_item_end'}) else: cap = m.group(0) self._process_list_item(cap, bull) self.tokens.append({'type': 'list_end'}) self._list_depth -= 1 def _process_list_item(self, cap, bull): cap = self.rules.list_item.findall(cap) _next = False length = len(cap) for i in range(length): item = cap[i][0] # remove the bullet space = len(item) item = self.rules.list_bullet.sub('', item) # outdent if '\n ' in item: space = space - len(item) pattern = re.compile(r'^ {1,%d}' % space, flags=re.M) item = pattern.sub('', item) # determine whether item is loose or not loose = _next if not loose and re.search(r'\n\n(?!\s*$)', item): loose = True rest = len(item) if i != length - 1 and rest: _next = item[rest-1] == '\n' if not loose: loose = _next if loose: t = 'loose_item_start' else: t = 'list_item_start' self.tokens.append({'type': t}) # recurse self.parse(item, self.list_rules) self.tokens.append({'type': 'list_item_end'}) def parse_block_quote(self, m): self.tokens.append({'type': 'block_quote_start'}) self._blockquote_depth += 1 if self._blockquote_depth > self._max_recursive_depth: self.parse_text(m) else: # clean leading > cap = _block_quote_leading_pattern.sub('', m.group(0)) self.parse(cap) self.tokens.append({'type': 'block_quote_end'}) self._blockquote_depth -= 1 def parse_def_links(self, m): key = _keyify(m.group(1)) self.def_links[key] = { 'link': m.group(2), 'title': m.group(3), } def parse_def_footnotes(self, m): key = _keyify(m.group(1)) if key in self.def_footnotes: # footnote is already defined return self.def_footnotes[key] = 0 self.tokens.append({ 'type': 'footnote_start', 'key': key, }) text = m.group(2) if '\n' in text: lines = text.split('\n') whitespace = None for line in lines[1:]: space = len(line) - len(line.lstrip()) if space and (not whitespace or space < whitespace): whitespace = space newlines = [lines[0]] for line in lines[1:]: newlines.append(line[whitespace:]) text = '\n'.join(newlines) self.parse(text, self.footnote_rules) self.tokens.append({ 'type': 'footnote_end', 'key': key, }) def parse_table(self, m): item = self._process_table(m) cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3)) cells = cells.split('\n') for i, v in enumerate(cells): v = re.sub(r'^ *\| *| *\| *$', '', v) cells[i] = re.split(r' *(?<!\\)\| *', v) item['cells'] = self._process_cells(cells) self.tokens.append(item) def parse_nptable(self, m): item = self._process_table(m) cells = re.sub(r'\n$', '', m.group(3)) cells = cells.split('\n') for i, v in enumerate(cells): cells[i] = re.split(r' *(?<!\\)\| *', v) item['cells'] = self._process_cells(cells) self.tokens.append(item) def _process_table(self, m): header = re.sub(r'^ *| *\| *$', '', m.group(1)) header = re.split(r' *\| *', header) align = re.sub(r' *|\| *$', '', m.group(2)) align = re.split(r' *\| *', align) for i, v in enumerate(align): if re.search(r'^ *-+: *$', v): align[i] = 'right' elif re.search(r'^ *:-+: *$', v): align[i] = 'center' elif re.search(r'^ *:-+ *$', v): align[i] = 'left' else: align[i] = None item = { 'type': 'table', 'header': header, 'align': align, } return item def _process_cells(self, cells): for i, line in enumerate(cells): for c, cell in enumerate(line): # de-escape any pipe inside the cell here cells[i][c] = re.sub('\\\\\|', '|', cell) return cells def parse_block_html(self, m): tag = m.group(1) if not tag: text = m.group(0) self.tokens.append({ 'type': 'close_html', 'text': text }) else: attr = m.group(2) text = m.group(3) self.tokens.append({ 'type': 'open_html', 'tag': tag, 'extra': attr, 'text': text }) def parse_paragraph(self, m): text = m.group(1).rstrip('\n') self.tokens.append({'type': 'paragraph', 'text': text}) def parse_text(self, m): text = m.group(0) self.tokens.append({'type': 'text', 'text': text}) class InlineGrammar(object): """Grammars for inline level tokens.""" escape = re.compile(r'^\\([\\`*{}\[\]()#+\-.!_>~|])') # \* \+ \! .... inline_html = re.compile( r'^(?:%s|%s|%s)' % ( r'<!--[\s\S]*?-->', r'<(\w+%s)((?:%s)*?)\s*>([\s\S]*?)<\/\1>' % ( _valid_end, _valid_attr), r'<\w+%s(?:%s)*?\s*\/?>' % (_valid_end, _valid_attr), ) ) autolink = re.compile(r'^<([^ >]+(@|:)[^ >]+)>') link = re.compile( r'^!?\[(' r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*' r')\]\(' r'''\s*(<)?([\s\S]*?)(?(2)>)(?:\s+['"]([\s\S]*?)['"])?\s*''' r'\)' ) reflink = re.compile( r'^!?\[(' r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*' r')\]\s*\[([^^\]]*)\]' ) nolink = re.compile(r'^!?\[((?:\[[^\]]*\]|[^\[\]])*)\]') url = re.compile(r'''^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])''') double_emphasis = re.compile( r'^_{2}([\s\S]+?)_{2}(?!_)' # __word__ r'|' r'^\*{2}([\s\S]+?)\*{2}(?!\*)' # **word** ) emphasis = re.compile( r'^\b_((?:__|[^_])+?)_\b' # _word_ r'|' r'^\*((?:\*\*|[^\*])+?)\*(?!\*)' # *word* ) code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)') # `code` linebreak = re.compile(r'^ {2,}\n(?!\s*$)') strikethrough = re.compile(r'^~~(?=\S)([\s\S]*?\S)~~') # ~~word~~ footnote = re.compile(r'^\[\^([^\]]+)\]') text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| {2,}\n|$)') def hard_wrap(self): """Grammar for hard wrap linebreak. You don't need to add two spaces at the end of a line. """ self.linebreak = re.compile(r'^ *\n(?!\s*$)') self.text = re.compile( r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| *\n|$)' ) class InlineLexer(object): """Inline level lexer for inline grammars.""" grammar_class = InlineGrammar default_rules = [ 'escape', 'inline_html', 'autolink', 'url', 'footnote', 'link', 'reflink', 'nolink', 'double_emphasis', 'emphasis', 'code', 'linebreak', 'strikethrough', 'text', ] inline_html_rules = [ 'escape', 'inline_html', 'autolink', 'url', 'link', 'reflink', 'nolink', 'double_emphasis', 'emphasis', 'code', 'linebreak', 'strikethrough', 'text', ] def __init__(self, renderer, rules=None, **kwargs): self.renderer = renderer self.links = {} self.footnotes = {} self.footnote_index = 0 if not rules: rules = self.grammar_class() kwargs.update(self.renderer.options) if kwargs.get('hard_wrap'): rules.hard_wrap() self.rules = rules self._in_link = False self._in_footnote = False self._parse_inline_html = kwargs.get('parse_inline_html') def __call__(self, text, rules=None): return self.output(text, rules) def setup(self, links, footnotes): self.footnote_index = 0 self.links = links or {} self.footnotes = footnotes or {} def output(self, text, rules=None): text = text.rstrip('\n') if not rules: rules = list(self.default_rules) if self._in_footnote and 'footnote' in rules: rules.remove('footnote') output = self.renderer.placeholder() def manipulate(text): for key in rules: pattern = getattr(self.rules, key) m = pattern.match(text) if not m: continue self.line_match = m out = getattr(self, 'output_%s' % key)(m) if out is not None: return m, out return False # pragma: no cover while text: ret = manipulate(text) if ret is not False: m, out = ret output += out text = text[len(m.group(0)):] continue if text: # pragma: no cover raise RuntimeError('Infinite loop at: %s' % text) return output def output_escape(self, m): text = m.group(1) return self.renderer.escape(text) def output_autolink(self, m): link = m.group(1) if m.group(2) == '@': is_email = True else: is_email = False return self.renderer.autolink(link, is_email) def output_url(self, m): link = m.group(1) if self._in_link: return self.renderer.text(link) return self.renderer.autolink(link, False) def output_inline_html(self, m): tag = m.group(1) if self._parse_inline_html and tag in _inline_tags: text = m.group(3) if tag == 'a': self._in_link = True text = self.output(text, rules=self.inline_html_rules) self._in_link = False else: text = self.output(text, rules=self.inline_html_rules) extra = m.group(2) or '' html = '<%s%s>%s</%s>' % (tag, extra, text, tag) else: html = m.group(0) return self.renderer.inline_html(html) def output_footnote(self, m): key = _keyify(m.group(1)) if key not in self.footnotes: return None if self.footnotes[key]: return None self.footnote_index += 1 self.footnotes[key] = self.footnote_index return self.renderer.footnote_ref(key, self.footnote_index) def output_link(self, m): return self._process_link(m, m.group(3), m.group(4)) def output_reflink(self, m): key = _keyify(m.group(2) or m.group(1)) if key not in self.links: return None ret = self.links[key] return self._process_link(m, ret['link'], ret['title']) def output_nolink(self, m): key = _keyify(m.group(1)) if key not in self.links: return None ret = self.links[key] return self._process_link(m, ret['link'], ret['title']) def _process_link(self, m, link, title=None): line = m.group(0) text = m.group(1) if line[0] == '!': return self.renderer.image(link, title, text) self._in_link = True text = self.output(text) self._in_link = False return self.renderer.link(link, title, text) def output_double_emphasis(self, m): text = m.group(2) or m.group(1) text = self.output(text) return self.renderer.double_emphasis(text) def output_emphasis(self, m): text = m.group(2) or m.group(1) text = self.output(text) return self.renderer.emphasis(text) def output_code(self, m): text = m.group(2) return self.renderer.codespan(text) def output_linebreak(self, m): return self.renderer.linebreak() def output_strikethrough(self, m): text = self.output(m.group(1)) return self.renderer.strikethrough(text) def output_text(self, m): text = m.group(0) return self.renderer.text(text) class Renderer(object): """The default HTML renderer for rendering Markdown. """ def __init__(self, **kwargs): self.options = kwargs def placeholder(self): """Returns the default, empty output value for the renderer. All renderer methods use the '+=' operator to append to this value. Default is a string so rendering HTML can build up a result string with the rendered Markdown. Can be overridden by Renderer subclasses to be types like an empty list, allowing the renderer to create a tree-like structure to represent the document (which can then be reprocessed later into a separate format like docx or pdf). """ return '' def block_code(self, code, lang=None): """Rendering block level code. ``pre > code``. :param code: text content of the code block. :param lang: language of the given code. """ code = code.rstrip('\n') if not lang: code = escape(code, smart_amp=False) return '<pre><code>%s\n</code></pre>\n' % code code = escape(code, quote=True, smart_amp=False) return '<pre><code class="lang-%s">%s\n</code></pre>\n' % (lang, code) def block_quote(self, text): """Rendering <blockquote> with the given text. :param text: text content of the blockquote. """ return '<blockquote>%s\n</blockquote>\n' % text.rstrip('\n') def block_html(self, html): """Rendering block level pure html content. :param html: text content of the html snippet. """ if self.options.get('skip_style') and \ html.lower().startswith('<style'): return '' if self.options.get('escape'): return escape(html) return html def header(self, text, level, raw=None): """Rendering header/heading tags like ``<h1>`` ``<h2>``. :param text: rendered text content for the header. :param level: a number for the header level, for example: 1. :param raw: raw text content of the header. """ return '<h%d>%s</h%d>\n' % (level, text, level) def hrule(self): """Rendering method for ``<hr>`` tag.""" if self.options.get('use_xhtml'): return '<hr />\n' return '<hr>\n' def list(self, body, ordered=True): """Rendering list tags like ``<ul>`` and ``<ol>``. :param body: body contents of the list. :param ordered: whether this list is ordered or not. """ tag = 'ul' if ordered: tag = 'ol' return '<%s>\n%s</%s>\n' % (tag, body, tag) def list_item(self, text): """Rendering list item snippet. Like ``<li>``.""" return '<li>%s</li>\n' % text def paragraph(self, text): """Rendering paragraph tags. Like ``<p>``.""" return '<p>%s</p>\n' % text.strip(' ') def table(self, header, body): """Rendering table element. Wrap header and body in it. :param header: header part of the table. :param body: body part of the table. """ return ( '<table>\n<thead>%s</thead>\n' '<tbody>\n%s</tbody>\n</table>\n' ) % (header, body) def table_row(self, content): """Rendering a table row. Like ``<tr>``. :param content: content of current table row. """ return '<tr>\n%s</tr>\n' % content def table_cell(self, content, **flags): """Rendering a table cell. Like ``<th>`` ``<td>``. :param content: content of current table cell. :param header: whether this is header or not. :param align: align of current table cell. """ if flags['header']: tag = 'th' else: tag = 'td' align = flags['align'] if not align: return '<%s>%s</%s>\n' % (tag, content, tag) return '<%s style="text-align:%s">%s</%s>\n' % ( tag, align, content, tag ) def double_emphasis(self, text): """Rendering **strong** text. :param text: text content for emphasis. """ return '<strong>%s</strong>' % text def emphasis(self, text): """Rendering *emphasis* text. :param text: text content for emphasis. """ return '<em>%s</em>' % text def codespan(self, text): """Rendering inline `code` text. :param text: text content for inline code. """ text = escape(text.rstrip(), smart_amp=False) return '<code>%s</code>' % text def linebreak(self): """Rendering line break like ``<br>``.""" if self.options.get('use_xhtml'): return '<br />\n' return '<br>\n' def strikethrough(self, text): """Rendering ~~strikethrough~~ text. :param text: text content for strikethrough. """ return '<del>%s</del>' % text def text(self, text): """Rendering unformatted text. :param text: text content. """ if self.options.get('parse_block_html'): return text return escape(text) def escape(self, text): """Rendering escape sequence. :param text: text content. """ return escape(text) def autolink(self, link, is_email=False): """Rendering a given link or email address. :param link: link content or email address. :param is_email: whether this is an email or not. """ text = link = escape_link(link) if is_email: link = 'mailto:%s' % link return '<a href="%s">%s</a>' % (link, text) def link(self, link, title, text): """Rendering a given link with content and title. :param link: href link for ``<a>`` tag. :param title: title content for `title` attribute. :param text: text content for description. """ link = escape_link(link) if not title: return '<a href="%s">%s</a>' % (link, text) title = escape(title, quote=True) return '<a href="%s" title="%s">%s</a>' % (link, title, text) def image(self, src, title, text): """Rendering a image with title and text. :param src: source link of the image. :param title: title text of the image. :param text: alt text of the image. """ src = escape_link(src) text = escape(text, quote=True) if title: title = escape(title, quote=True) html = '<img src="%s" alt="%s" title="%s"' % (src, text, title) else: html = '<img src="%s" alt="%s"' % (src, text) if self.options.get('use_xhtml'): return '%s />' % html return '%s>' % html def inline_html(self, html): """Rendering span level pure html content. :param html: text content of the html snippet. """ if self.options.get('escape'): return escape(html) return html def newline(self): """Rendering newline element.""" return '' def footnote_ref(self, key, index): """Rendering the ref anchor of a footnote. :param key: identity key for the footnote. :param index: the index count of current footnote. """ html = ( '<sup class="footnote-ref" id="fnref-%s">' '<a href="#fn-%s">%d</a></sup>' ) % (escape(key), escape(key), index) return html def footnote_item(self, key, text): """Rendering a footnote item. :param key: identity key for the footnote. :param text: text content of the footnote. """ back = ( '<a href="#fnref-%s" class="footnote">↩</a>' ) % escape(key) text = text.rstrip() if text.endswith('</p>'): text = re.sub(r'<\/p>$', r'%s</p>' % back, text) else: text = '%s<p>%s</p>' % (text, back) html = '<li id="fn-%s">%s</li>\n' % (escape(key), text) return html def footnotes(self, text): """Wrapper for all footnotes. :param text: contents of all footnotes. """ html = '<div class="footnotes">\n%s<ol>%s</ol>\n</div>\n' return html % (self.hrule(), text) class Markdown(object): """The Markdown parser. :param renderer: An instance of ``Renderer``. :param inline: An inline lexer class or instance. :param block: A block lexer class or instance. """ def __init__(self, renderer=None, inline=None, block=None, **kwargs): if not renderer: renderer = Renderer(**kwargs) else: kwargs.update(renderer.options) self.renderer = renderer if inline and inspect.isclass(inline): inline = inline(renderer, **kwargs) if block and inspect.isclass(block): block = block(**kwargs) if inline: self.inline = inline else: self.inline = InlineLexer(renderer, **kwargs) self.block = block or BlockLexer(BlockGrammar()) self.footnotes = [] self.tokens = [] # detect if it should parse text in block html self._parse_block_html = kwargs.get('parse_block_html') def __call__(self, text): return self.parse(text) def render(self, text): """Render the Markdown text. :param text: markdown formatted text content. """ return self.parse(text) def parse(self, text): out = self.output(preprocessing(text)) keys = self.block.def_footnotes # reset block self.block.def_links = {} self.block.def_footnotes = {} # reset inline self.inline.links = {} self.inline.footnotes = {} if not self.footnotes: return out footnotes = filter(lambda o: keys.get(o['key']), self.footnotes) self.footnotes = sorted( footnotes, key=lambda o: keys.get(o['key']), reverse=True ) body = self.renderer.placeholder() while self.footnotes: note = self.footnotes.pop() body += self.renderer.footnote_item( note['key'], note['text'] ) out += self.renderer.footnotes(body) return out def pop(self): if not self.tokens: return None self.token = self.tokens.pop() return self.token def peek(self): if self.tokens: return self.tokens[-1] return None # pragma: no cover def output(self, text, rules=None): self.tokens = self.block(text, rules) self.tokens.reverse() self.inline.setup(self.block.def_links, self.block.def_footnotes) out = self.renderer.placeholder() while self.pop(): out += self.tok() return out def tok(self): t = self.token['type'] # sepcial cases if t.endswith('_start'): t = t[:-6] return getattr(self, 'output_%s' % t)() def tok_text(self): text = self.token['text'] while self.peek()['type'] == 'text': text += '\n' + self.pop()['text'] return self.inline(text) def output_newline(self): return self.renderer.newline() def output_hrule(self): return self.renderer.hrule() def output_heading(self): return self.renderer.header( self.inline(self.token['text']), self.token['level'], self.token['text'], ) def output_code(self): return self.renderer.block_code( self.token['text'], self.token['lang'] ) def output_table(self): aligns = self.token['align'] aligns_length = len(aligns) cell = self.renderer.placeholder() # header part header = self.renderer.placeholder() for i, value in enumerate(self.token['header']): align = aligns[i] if i < aligns_length else None flags = {'header': True, 'align': align} cell += self.renderer.table_cell(self.inline(value), **flags) header += self.renderer.table_row(cell) # body part body = self.renderer.placeholder() for i, row in enumerate(self.token['cells']): cell = self.renderer.placeholder() for j, value in enumerate(row): align = aligns[j] if j < aligns_length else None flags = {'header': False, 'align': align} cell += self.renderer.table_cell(self.inline(value), **flags) body += self.renderer.table_row(cell) return self.renderer.table(header, body) def output_block_quote(self): body = self.renderer.placeholder() while self.pop()['type'] != 'block_quote_end': body += self.tok() return self.renderer.block_quote(body) def output_list(self): ordered = self.token['ordered'] body = self.renderer.placeholder() while self.pop()['type'] != 'list_end': body += self.tok() return self.renderer.list(body, ordered) def output_list_item(self): body = self.renderer.placeholder() while self.pop()['type'] != 'list_item_end': if self.token['type'] == 'text': body += self.tok_text() else: body += self.tok() return self.renderer.list_item(body) def output_loose_item(self): body = self.renderer.placeholder() while self.pop()['type'] != 'list_item_end': body += self.tok() return self.renderer.list_item(body) def output_footnote(self): self.inline._in_footnote = True body = self.renderer.placeholder() key = self.token['key'] while self.pop()['type'] != 'footnote_end': body += self.tok() self.footnotes.append({'key': key, 'text': body}) self.inline._in_footnote = False return self.renderer.placeholder() def output_close_html(self): text = self.token['text'] return self.renderer.block_html(text) def output_open_html(self): text = self.token['text'] tag = self.token['tag'] if self._parse_block_html and tag not in _pre_tags: text = self.inline(text, rules=self.inline.inline_html_rules) extra = self.token.get('extra') or '' html = '<%s%s>%s</%s>' % (tag, extra, text, tag) return self.renderer.block_html(html) def output_paragraph(self): return self.renderer.paragraph(self.inline(self.token['text'])) def output_text(self): return self.renderer.paragraph(self.tok_text()) def markdown(text, escape=True, **kwargs): """Render markdown formatted text to html. :param text: markdown formatted text content. :param escape: if set to False, all html tags will not be escaped. :param use_xhtml: output with xhtml tags. :param hard_wrap: if set to True, it will use the GFM line breaks feature. :param parse_block_html: parse text only in block level html. :param parse_inline_html: parse text only in inline level html. """ return Markdown(escape=escape, **kwargs)(text)