pelidoc/pelidoc.py

172 lines
5.6 KiB
Python

# -*- coding: utf-8 -*-
"""A Pelican plugin to use Pandoc software.
Pandoc is a powerful tool to transform text from different formats (e.g. ReST,
Markdown) to others. It is really helpful to generate PDF or EPUB.
This plugin provides a powerful interface to handle document generation through
Pandoc.
Here is the list of configuration variables.
PANDOC_OUTPUTS a dict where the keys are the output formats and the
values are the destination directories. Note if
to_format is set at "pdf", the "to"-argument is set at
latex" for Pandoc which does not support "-t pdf"
argument.
PANDOC_EXPORT_ARTICLES True (default) if you want to export articles, False
else.
PANDOC_EXPORT_PAGES True (default) if you want to export pages, False else.
PANDOC_MARKDOWN_EXTENSIONS a list with Pandoc's markdown extensions
PANDOC_EXECUTABLE path to pandoc executable
PANDOC_EXTRA_OPTIONS extra options to pandoc executable
Important note: this plugin requires Pandoc to be installed on your system to work!
"""
from __future__ import unicode_literals
import os
import logging
from pelican import signals
from pelican.generators import Generator
from subprocess import check_call
logger = logging.getLogger(__name__)
class PandocGenerator(Generator):
"""The Pandoc generator.
It takes the list of articles / pages and generates files according to
PANDOC_OUTPUTS configuration variable.
"""
def guess_format(self, content):
"""Return the format used by a given content.
Since Pelican only supports reStructuredText and MarkDown files, this
method returns just "md" or "rst".
It is based on the file extension.
TODO: it could be great if Pelican could support this method itself.
:param content: the content (article or page)
:type content: pelican.contents.Content
:return: the article type (rst or md)
:rtype: str
:raises: KeyError if the file extension is not supported
"""
formats = {
'.rst': 'rst',
'.md': 'md',
'.markdown': 'md',
'.mkd': 'md',
'.mdown': 'md',
}
file_name, file_extension = os.path.splitext(content.source_path)
return formats[file_extension]
def check_output_dir(self, dir_):
"""Check and create if needed the given directory."""
if not os.path.isdir(dir_):
try:
os.mkdir(dir_)
except OSError:
return False
return True
def generate_files(self, content):
"""Generates the list of files for a given content.
:param content: the content to generate.
:type content: pelican.contents.Content
"""
if self.settings.get('PDF_PROCESSOR', False) == False:
return
try:
from_format = self.guess_format(content)
except KeyError:
logger.error("Unsupported format for {filename}".format(
filename=content.source_path
))
return
list_outputs = self.settings.get('PANDOC_OUTPUTS', {})
for to_format, output_dir in list_outputs.items():
output_dir = os.path.join(self.output_path, output_dir)
if not self.check_output_dir(output_dir):
logger.error("Couldn't create the {format} output "
"folder in {dir}".format(format=to_format,
dir=output_dir))
continue
filename = "{id_file}.{extension}".format(
id_file=content.slug,
extension=to_format
)
filepath = os.path.join(output_dir, filename)
# Pandoc don't take "pdf" as an output value. Use latex instead.
to_format = 'latex' if to_format == 'pdf' else to_format
# Use the same format as Pelican (paticularly for metadata!)
if from_format == 'md':
from_format = 'markdown' + \
''.join(self.settings.get('PANDOC_MARKDOWN_EXTENSIONS', []))
# Here is the magic!
# TODO: support extra_args extending (it could be useful to use
# specific Pandoc template).
check_call([self.settings.get('PANDOC_EXECUTABLE', 'pandoc')] +
self.settings.get('PANDOC_EXTRA_OPTIONS', []) +
['-f', from_format, '-t', to_format ] +
['--standalone', '-o', filepath, content.source_path])
logger.info("[ok] writing {filepath}".format(
filepath=filepath
))
def generate_output(self, writer=None):
"""Generate files for each articles and pages.
If PANDOC_EXPORT_ARTICLES is False, articles are not generated.
If PANDOC_EXPORT_PAGES is False, pages are not generated.
We don't use the writer passed as argument since we write our own
files ((c) PDF plugin :)).
"""
contents_to_export = []
if self.settings.get('PANDOC_EXPORT_ARTICLES', True):
contents_to_export += self.context['articles']
if self.settings.get('PANDOC_EXPORT_PAGES', True):
contents_to_export += self.context['pages']
for content_to_export in contents_to_export:
self.generate_files(content_to_export)
def get_generators(generators):
return PandocGenerator
def register():
"""Register our Pandoc class to the Pelican generators."""
signals.get_generators.connect(get_generators)