# coding: utf-8 """String filters. Contains a collection of useful string manipulation filters for use in Jinja templates. """ # Copyright (c) IPython Development Team. # Distributed under the terms of the Modified BSD License. import os import re import textwrap import warnings from urllib.parse import quote # defusedxml does safe(r) parsing of untrusted XML data from defusedxml import ElementTree from xml.etree.ElementTree import Element __all__ = [ 'wrap_text', 'html2text', 'add_anchor', 'strip_dollars', 'strip_files_prefix', 'comment_lines', 'get_lines', 'ipython2python', 'posix_path', 'path2url', 'add_prompts', 'ascii_only', 'prevent_list_blocks', 'strip_trailing_newline', ] def wrap_text(text, width=100): """ Intelligently wrap text. Wrap text without breaking words if possible. Parameters ---------- text : str Text to wrap. width : int, optional Number of characters to wrap to, default 100. """ split_text = text.split('\n') wrp = map(lambda x:textwrap.wrap(x,width), split_text) wrpd = map('\n'.join, wrp) return '\n'.join(wrpd) def html2text(element): """extract inner text from html Analog of jQuery's $(element).text() """ if isinstance(element, (str,)): try: element = ElementTree.fromstring(element) except Exception: # failed to parse, just return it unmodified return element text = element.text or "" for child in element: text += html2text(child) text += (element.tail or "") return text def _convert_header_id(header_contents): """Convert header contents to valid id value. Takes string as input, returns string. Note: this may be subject to change in the case of changes to how we wish to generate ids. For use on markdown headings. """ return header_contents.replace(' ', '-') def add_anchor(html, anchor_link_text=u'ΒΆ'): """Add an id and an anchor-link to an html header For use on markdown headings """ try: h = ElementTree.fromstring(html) except Exception: # failed to parse, just return it unmodified return html link = _convert_header_id(html2text(h)) h.set('id', link) a = Element("a", {"class": "anchor-link", "href": "#" + link}) try: # Test if the anchor link text is HTML (e.g. an image) a.append(ElementTree.fromstring(anchor_link_text)) except Exception: # If we fail to parse, assume we've just got regular text a.text = anchor_link_text h.append(a) return ElementTree.tostring(h).decode(encoding='utf-8') def add_prompts(code, first='>>> ', cont='... '): """Add prompts to code snippets""" new_code = [] code_list = code.split('\n') new_code.append(first + code_list[0]) for line in code_list[1:]: new_code.append(cont + line) return '\n'.join(new_code) def strip_dollars(text): """ Remove all dollar symbols from text Parameters ---------- text : str Text to remove dollars from """ return text.strip('$') files_url_pattern = re.compile(r'(src|href)\=([\'"]?)/?files/') markdown_url_pattern = re.compile(r'(!?)\[(?P.*?)\]\(/?files/(?P.*?)\)') def strip_files_prefix(text): """ Fix all fake URLs that start with ``files/``, stripping out the ``files/`` prefix. Applies to both urls (for html) and relative paths (for markdown paths). Parameters ---------- text : str Text in which to replace 'src="files/real...' with 'src="real...' """ cleaned_text = files_url_pattern.sub(r"\1=\2", text) cleaned_text = markdown_url_pattern.sub(r'\1[\2](\3)', cleaned_text) return cleaned_text def comment_lines(text, prefix='# '): """ Build a Python comment line from input text. Parameters ---------- text : str Text to comment out. prefix : str Character to append to the start of each line. """ #Replace line breaks with line breaks and comment symbols. #Also add a comment symbol at the beginning to comment out #the first line. return prefix + ('\n'+prefix).join(text.split('\n')) def get_lines(text, start=None,end=None): """ Split the input text into separate lines and then return the lines that the caller is interested in. Parameters ---------- text : str Text to parse lines from. start : int, optional First line to grab from. end : int, optional Last line to grab from. """ # Split the input into lines. lines = text.split("\n") # Return the right lines. return "\n".join(lines[start:end]) #re-join def ipython2python(code): """Transform IPython syntax to pure Python syntax Parameters ---------- code : str IPython code, to be transformed to pure Python """ try: from IPython.core.inputsplitter import IPythonInputSplitter except ImportError: warnings.warn( "IPython is needed to transform IPython syntax to pure Python." " Install ipython if you need this functionality." ) return code else: isp = IPythonInputSplitter(line_input_checker=False) return isp.transform_cell(code) def posix_path(path): """Turn a path into posix-style path/to/etc Mainly for use in latex on Windows, where native Windows paths are not allowed. """ if os.path.sep != '/': return path.replace(os.path.sep, '/') return path def path2url(path): """Turn a file path into a URL""" parts = path.split(os.path.sep) return '/'.join(quote(part) for part in parts) def ascii_only(s): """ensure a string is ascii""" return s.encode('ascii', 'replace').decode('ascii') def prevent_list_blocks(s): """ Prevent presence of enumerate or itemize blocks in latex headings cells """ out = re.sub(r'(^\s*\d*)\.', r'\1\.', s) out = re.sub(r'(^\s*)\-', r'\1\-', out) out = re.sub(r'(^\s*)\+', r'\1\+', out) out = re.sub(r'(^\s*)\*', r'\1\*', out) return out def strip_trailing_newline(text): """ Strips a newline from the end of text. """ if text.endswith('\n'): text = text[:-1] return text