579 lines
22 KiB

#!/usr/bin/env python
"""NbConvert is a utility for conversion of .ipynb files.
Command-line interface for the NbConvert conversion utility.
"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
from __future__ import print_function
import logging
import sys
import os
import glob
import asyncio
from textwrap import fill, dedent
from ipython_genutils.text import indent
from jupyter_core.application import JupyterApp, base_aliases, base_flags
from traitlets.config import catch_config_error, Configurable
from traitlets import (
Unicode, List, Instance, DottedObjectName, Type, Bool,
default, observe,
)
from traitlets.utils.importstring import import_item
from .exporters.base import get_export_names, get_exporter
from nbconvert import exporters, preprocessors, writers, postprocessors, __version__
from .utils.base import NbConvertBase
from .utils.exceptions import ConversionException
from .utils.io import unicode_stdin_stream
#-----------------------------------------------------------------------------
#Classes and functions
#-----------------------------------------------------------------------------
class DottedOrNone(DottedObjectName):
"""A string holding a valid dotted object name in Python, such as A.b3._c
Also allows for None type.
"""
default_value = u''
def validate(self, obj, value):
if value is not None and len(value) > 0:
return super().validate(obj, value)
else:
return value
nbconvert_aliases = {}
nbconvert_aliases.update(base_aliases)
nbconvert_aliases.update({
'to' : 'NbConvertApp.export_format',
'template' : 'TemplateExporter.template_name',
'template-file' : 'TemplateExporter.template_file',
'writer' : 'NbConvertApp.writer_class',
'post': 'NbConvertApp.postprocessor_class',
'output': 'NbConvertApp.output_base',
'output-dir': 'FilesWriter.build_directory',
'reveal-prefix': 'SlidesExporter.reveal_url_prefix',
'nbformat': 'NotebookExporter.nbformat_version',
})
nbconvert_flags = {}
nbconvert_flags.update(base_flags)
nbconvert_flags.update({
'execute' : (
{'ExecutePreprocessor' : {'enabled' : True}},
"Execute the notebook prior to export."
),
'allow-errors' : (
{'ExecutePreprocessor' : {'allow_errors' : True}},
("Continue notebook execution even if one of the cells throws "
"an error and include the error message in the cell output "
"(the default behaviour is to abort conversion). This flag "
"is only relevant if '--execute' was specified, too.")
),
'stdin' : (
{'NbConvertApp' : {
'from_stdin' : True,
}
},
"read a single notebook file from stdin. Write the resulting notebook with default basename 'notebook.*'"
),
'stdout' : (
{'NbConvertApp' : {'writer_class' : "StdoutWriter"}},
"Write notebook output to stdout instead of files."
),
'inplace' : (
{
'NbConvertApp' : {
'use_output_suffix' : False,
'export_format' : 'notebook',
},
'FilesWriter' : {'build_directory': ''},
},
"""Run nbconvert in place, overwriting the existing notebook (only
relevant when converting to notebook format)"""
),
'clear-output' : (
{
'NbConvertApp' : {
'use_output_suffix' : False,
'export_format' : 'notebook',
},
'FilesWriter' : {'build_directory': ''},
'ClearOutputPreprocessor' : {'enabled' : True},
},
"""Clear output of current file and save in place,
overwriting the existing notebook. """
),
'no-prompt' : (
{'TemplateExporter' : {
'exclude_input_prompt' : True,
'exclude_output_prompt' : True,
}
},
"Exclude input and output prompts from converted document."
),
'no-input' : (
{'TemplateExporter' : {
'exclude_output_prompt' : True,
'exclude_input': True,
}
},
"""Exclude input cells and output prompts from converted document.
This mode is ideal for generating code-free reports."""
),
'allow-chromium-download' : (
{'WebPDFExporter' : {
'allow_chromium_download' : True,
}
},
"""Whether to allow downloading chromium if no suitable version is found on the system."""
),
})
class NbConvertApp(JupyterApp):
"""Application used to convert from notebook file type (``*.ipynb``)"""
version = __version__
name = 'jupyter-nbconvert'
aliases = nbconvert_aliases
flags = nbconvert_flags
@default('log_level')
def _log_level_default(self):
return logging.INFO
classes = List()
@default('classes')
def _classes_default(self):
classes = [NbConvertBase]
for pkg in (exporters, preprocessors, writers, postprocessors):
for name in dir(pkg):
cls = getattr(pkg, name)
if isinstance(cls, type) and issubclass(cls, Configurable):
classes.append(cls)
return classes
description = Unicode(
u"""This application is used to convert notebook files (*.ipynb)
to various other formats.
WARNING: THE COMMANDLINE INTERFACE MAY CHANGE IN FUTURE RELEASES.""")
output_base = Unicode('', help='''overwrite base name use for output files.
can only be used when converting one notebook at a time.
''').tag(config=True)
use_output_suffix = Bool(
True,
help="""Whether to apply a suffix prior to the extension (only relevant
when converting to notebook format). The suffix is determined by
the exporter, and is usually '.nbconvert'."""
).tag(config=True)
output_files_dir = Unicode('{notebook_name}_files',
help='''Directory to copy extra files (figures) to.
'{notebook_name}' in the string will be converted to notebook
basename.'''
).tag(config=True)
examples = Unicode(u"""
The simplest way to use nbconvert is
> jupyter nbconvert mynotebook.ipynb --to html
Options include {formats}.
> jupyter nbconvert --to latex mynotebook.ipynb
Both HTML and LaTeX support multiple output templates. LaTeX includes
'base', 'article' and 'report'. HTML includes 'basic' and 'full'. You
can specify the flavor of the format used.
> jupyter nbconvert --to html --template lab mynotebook.ipynb
You can also pipe the output to stdout, rather than a file
> jupyter nbconvert mynotebook.ipynb --stdout
PDF is generated via latex
> jupyter nbconvert mynotebook.ipynb --to pdf
You can get (and serve) a Reveal.js-powered slideshow
> jupyter nbconvert myslides.ipynb --to slides --post serve
Multiple notebooks can be given at the command line in a couple of
different ways:
> jupyter nbconvert notebook*.ipynb
> jupyter nbconvert notebook1.ipynb notebook2.ipynb
or you can specify the notebooks list in a config file, containing::
c.NbConvertApp.notebooks = ["my_notebook.ipynb"]
> jupyter nbconvert --config mycfg.py
""".format(formats=get_export_names()))
# Writer specific variables
writer = Instance('nbconvert.writers.base.WriterBase',
help="""Instance of the writer class used to write the
results of the conversion.""", allow_none=True)
writer_class = DottedObjectName('FilesWriter',
help="""Writer class used to write the
results of the conversion""").tag(config=True)
writer_aliases = {'fileswriter': 'nbconvert.writers.files.FilesWriter',
'debugwriter': 'nbconvert.writers.debug.DebugWriter',
'stdoutwriter': 'nbconvert.writers.stdout.StdoutWriter'}
writer_factory = Type(allow_none=True)
@observe('writer_class')
def _writer_class_changed(self, change):
new = change['new']
if new.lower() in self.writer_aliases:
new = self.writer_aliases[new.lower()]
self.writer_factory = import_item(new)
# Post-processor specific variables
postprocessor = Instance('nbconvert.postprocessors.base.PostProcessorBase',
help="""Instance of the PostProcessor class used to write the
results of the conversion.""", allow_none=True)
postprocessor_class = DottedOrNone(
help="""PostProcessor class used to write the
results of the conversion"""
).tag(config=True)
postprocessor_aliases = {'serve': 'nbconvert.postprocessors.serve.ServePostProcessor'}
postprocessor_factory = Type(None, allow_none=True)
@observe('postprocessor_class')
def _postprocessor_class_changed(self, change):
new = change['new']
if new.lower() in self.postprocessor_aliases:
new = self.postprocessor_aliases[new.lower()]
if new:
self.postprocessor_factory = import_item(new)
jupyter_widgets_base_url = Unicode("https://unpkg.com/",
help="URL base for Jupyter widgets").tag(config=True)
html_manager_semver_range = Unicode('*',
help="Semver range for Jupyter widgets HTML manager").tag(config=True)
export_format = Unicode(
allow_none=False,
help="""The export format to be used, either one of the built-in formats
{formats}
or a dotted object name that represents the import path for an
`Exporter` class""".format(formats=get_export_names())
).tag(config=True)
notebooks = List([], help="""List of notebooks to convert.
Wildcards are supported.
Filenames passed positionally will be added to the list.
"""
).tag(config=True)
from_stdin = Bool(False, help="read a single notebook from stdin.").tag(config=True)
@catch_config_error
def initialize(self, argv=None):
"""Initialize application, notebooks, writer, and postprocessor"""
# See https://bugs.python.org/issue37373 :(
if sys.version_info[0] == 3 and sys.version_info[1] >= 8 and sys.platform.startswith('win'):
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
self.init_syspath()
super().initialize(argv)
self.init_notebooks()
self.init_writer()
self.init_postprocessor()
def init_syspath(self):
"""Add the cwd to the sys.path ($PYTHONPATH)"""
sys.path.insert(0, os.getcwd())
def init_notebooks(self):
"""Construct the list of notebooks.
If notebooks are passed on the command-line,
they override (rather than add) notebooks specified in config files.
Glob each notebook to replace notebook patterns with filenames.
"""
# Specifying notebooks on the command-line overrides (rather than
# adds) the notebook list
if self.extra_args:
patterns = self.extra_args
else:
patterns = self.notebooks
# Use glob to replace all the notebook patterns with filenames.
filenames = []
for pattern in patterns:
# Use glob to find matching filenames. Allow the user to convert
# notebooks without having to type the extension.
globbed_files = glob.glob(pattern)
globbed_files.extend(glob.glob(pattern + '.ipynb'))
if not globbed_files:
self.log.warning("pattern %r matched no files", pattern)
for filename in globbed_files:
if not filename in filenames:
filenames.append(filename)
self.notebooks = filenames
def init_writer(self):
"""Initialize the writer (which is stateless)"""
self._writer_class_changed({ 'new': self.writer_class })
self.writer = self.writer_factory(parent=self)
if hasattr(self.writer, 'build_directory') and self.writer.build_directory != '':
self.use_output_suffix = False
def init_postprocessor(self):
"""Initialize the postprocessor (which is stateless)"""
self._postprocessor_class_changed({'new': self.postprocessor_class})
if self.postprocessor_factory:
self.postprocessor = self.postprocessor_factory(parent=self)
def start(self):
"""Run start after initialization process has completed"""
super().start()
self.convert_notebooks()
def init_single_notebook_resources(self, notebook_filename):
"""Step 1: Initialize resources
This initializes the resources dictionary for a single notebook.
Returns
-------
dict
resources dictionary for a single notebook that MUST include the following keys:
- config_dir: the location of the Jupyter config directory
- unique_key: the notebook name
- output_files_dir: a directory where output files (not
including the notebook itself) should be saved
"""
basename = os.path.basename(notebook_filename)
notebook_name = basename[:basename.rfind('.')]
if self.output_base:
# strip duplicate extension from output_base, to avoid Basename.ext.ext
if getattr(self.exporter, 'file_extension', False):
base, ext = os.path.splitext(self.output_base)
if ext == self.exporter.file_extension:
self.output_base = base
notebook_name = self.output_base
self.log.debug("Notebook name is '%s'", notebook_name)
# first initialize the resources we want to use
resources = {}
resources['config_dir'] = self.config_dir
resources['unique_key'] = notebook_name
output_files_dir = (self.output_files_dir
.format(notebook_name=notebook_name))
resources['output_files_dir'] = output_files_dir
resources['jupyter_widgets_base_url'] = self.jupyter_widgets_base_url
resources['html_manager_semver_range'] = self.html_manager_semver_range
return resources
def export_single_notebook(self, notebook_filename, resources, input_buffer=None):
"""Step 2: Export the notebook
Exports the notebook to a particular format according to the specified
exporter. This function returns the output and (possibly modified)
resources from the exporter.
Parameters
----------
notebook_filename : str
name of notebook file.
resources : dict
input_buffer :
readable file-like object returning unicode.
if not None, notebook_filename is ignored
Returns
-------
output
dict
resources (possibly modified)
"""
try:
if input_buffer is not None:
output, resources = self.exporter.from_file(input_buffer, resources=resources)
else:
output, resources = self.exporter.from_filename(notebook_filename, resources=resources)
except ConversionException:
self.log.error("Error while converting '%s'", notebook_filename, exc_info=True)
self.exit(1)
return output, resources
def write_single_notebook(self, output, resources):
"""Step 3: Write the notebook to file
This writes output from the exporter to file using the specified writer.
It returns the results from the writer.
Parameters
----------
output :
resources : dict
resources for a single notebook including name, config directory
and directory to save output
Returns
-------
file
results from the specified writer output of exporter
"""
if 'unique_key' not in resources:
raise KeyError("unique_key MUST be specified in the resources, but it is not")
notebook_name = resources['unique_key']
if self.use_output_suffix and not self.output_base:
notebook_name += resources.get('output_suffix', '')
write_results = self.writer.write(
output, resources, notebook_name=notebook_name)
return write_results
def postprocess_single_notebook(self, write_results):
"""Step 4: Post-process the written file
Only used if a postprocessor has been specified. After the
converted notebook is written to a file in Step 3, this post-processes
the notebook.
"""
# Post-process if post processor has been defined.
if hasattr(self, 'postprocessor') and self.postprocessor:
self.postprocessor(write_results)
def convert_single_notebook(self, notebook_filename, input_buffer=None):
"""Convert a single notebook.
Performs the following steps:
1. Initialize notebook resources
2. Export the notebook to a particular format
3. Write the exported notebook to file
4. (Maybe) postprocess the written file
Parameters
----------
notebook_filename : str
input_buffer :
If input_buffer is not None, conversion is done and the buffer is
used as source into a file basenamed by the notebook_filename
argument.
"""
if input_buffer is None:
self.log.info("Converting notebook %s to %s", notebook_filename, self.export_format)
else:
self.log.info("Converting notebook into %s", self.export_format)
resources = self.init_single_notebook_resources(notebook_filename)
output, resources = self.export_single_notebook(notebook_filename, resources, input_buffer=input_buffer)
write_results = self.write_single_notebook(output, resources)
self.postprocess_single_notebook(write_results)
def convert_notebooks(self):
"""Convert the notebooks in the self.notebook traitlet """
# check that the output base isn't specified if there is more than
# one notebook to convert
if self.output_base != '' and len(self.notebooks) > 1:
self.log.error(
"""
UsageError: --output flag or `NbConvertApp.output_base` config option
cannot be used when converting multiple notebooks.
"""
)
self.exit(1)
# no notebooks to convert!
if len(self.notebooks) == 0 and not self.from_stdin:
self.print_help()
sys.exit(-1)
if not self.export_format:
raise ValueError(
"Please specify an output format with '--to <format>'."
f"\nThe following formats are available: {get_export_names()}"
)
# initialize the exporter
cls = get_exporter(self.export_format)
self.exporter = cls(config=self.config)
# convert each notebook
if not self.from_stdin:
for notebook_filename in self.notebooks:
self.convert_single_notebook(notebook_filename)
else:
input_buffer = unicode_stdin_stream()
# default name when conversion from stdin
self.convert_single_notebook("notebook.ipynb", input_buffer=input_buffer)
def document_flag_help(self):
"""
Return a string containing descriptions of all the flags.
"""
flags = "The following flags are defined:\n\n"
for flag, (cfg, fhelp) in self.flags.items():
flags += "{}\n".format(flag)
flags += indent(fill(fhelp, 80)) + '\n\n'
flags += indent(fill("Long Form: "+str(cfg), 80)) + '\n\n'
return flags
def document_alias_help(self):
"""Return a string containing all of the aliases"""
aliases = "The folowing aliases are defined:\n\n"
for alias, longname in self.aliases.items():
aliases += "\t**{}** ({})\n\n".format(alias, longname)
return aliases
def document_config_options(self):
"""
Provides a much improves version of the configuration documentation by
breaking the configuration options into app, exporter, writer,
preprocessor, postprocessor, and other sections.
"""
categories = {category: [c for c in self._classes_inc_parents() if category in c.__name__.lower()]
for category in ['app', 'exporter', 'writer', 'preprocessor', 'postprocessor']}
accounted_for = {c for category in categories.values() for c in category}
categories['other']= [c for c in self._classes_inc_parents() if c not in accounted_for]
header = dedent("""
{section} Options
-----------------------
""")
sections = ""
for category in categories:
sections += header.format(section=category.title())
if category in ['exporter','preprocessor','writer']:
sections += ".. image:: _static/{image}_inheritance.png\n\n".format(image=category)
sections += '\n'.join(c.class_config_rst_doc() for c in categories[category])
return sections.replace(' : ',r' \: ')
#-----------------------------------------------------------------------------
# Main entry point
#-----------------------------------------------------------------------------
main = launch_new_instance = NbConvertApp.launch_instance