From 40d157895c4cdc5178ab2b923e9d2a80cb117e34 Mon Sep 17 00:00:00 2001 From: Zack Weinberg Date: Wed, 26 Nov 2014 13:42:37 -0500 Subject: [PATCH] Fixes and enhancements including: * Add PANDOC_EXTENSIONS configuration variable, allowing one to enable or disable Pandoc's markdown extensions individually. * Remove dependency on pypandoc. * Don't change the working directory. * More efficient metadata extraction. --- README.md | 9 +++++++-- pandoc_reader.py | 51 ++++++++++++++++++++++++++---------------------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 25c4286..08055fc 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,6 @@ A pandoc [markdown] reader plugin for [pelican] Requirements ------------ - - [pypandoc] - [pandoc] in $PATH @@ -30,6 +29,13 @@ Additional command line parameters can be passed to pandoc via the PANDOC_ARGS p '--number-sections', ] +Pandoc's markdown extensions can be enabled or disabled via the +PANDOC_EXTENSIONS parameter. + + PANDOC_EXTENSIONS = [ + '+hard_line_breaks', + '-citations' + ] Contributing ------------ @@ -44,4 +50,3 @@ Contributing [markdown]: http://daringfireball.net/projects/markdown/ [pandoc]: http://johnmacfarlane.net/pandoc/ [pelican]: http://getpelican.com -[pypandoc]: https://github.com/bebraw/pypandoc diff --git a/pandoc_reader.py b/pandoc_reader.py index cade6b7..b4bd762 100644 --- a/pandoc_reader.py +++ b/pandoc_reader.py @@ -1,43 +1,48 @@ -import os +import subprocess + from pelican import signals from pelican.readers import BaseReader from pelican.utils import pelican_open -import pypandoc - class PandocReader(BaseReader): enabled = True file_extensions = ['md', 'markdown', 'mkd', 'mdown'] def read(self, filename): - with pelican_open(filename) as text: - metadata_items = [] - in_content = False - MD = '' - for line in text.splitlines(): - splitted = line.split(':', 1) - if len(splitted) == 2 and not in_content: - metadata_items.append(splitted) - else: - in_content = True - MD += line + '\n' + with pelican_open(filename) as fp: + text = list(fp.splitlines()) - metadata = {} - for name, value in metadata_items: - name = name.lower() - value = value.strip() + metadata = {} + for i, line in enumerate(text): + kv = line.split(':', 1) + if len(kv) == 2: + name, value = kv[0].lower(), kv[1].strip() metadata[name] = self.process_metadata(name, value) + else: + content = "\n".join(text[i:]) + break - os.chdir(self.settings['PATH']) # change the cwd to the content dir - if not 'PANDOC_ARGS' in self.settings: self.settings['PANDOC_ARGS'] = [] - output = pypandoc.convert(MD, 'html5', format='md', extra_args=self.settings['PANDOC_ARGS']) + extra_args = self.settings.get('PANDOC_ARGS', []) + extensions = self.settings.get('PANDOC_EXTENSIONS', '') + if isinstance(extensions, list): + extensions = ''.join(extensions) + + pandoc_cmd = ["pandoc", "--from=markdown" + extensions, "--to=html5"] + pandoc_cmd.extend(extra_args) + + proc = subprocess.Popen(pandoc_cmd, + stdin = subprocess.PIPE, + stdout = subprocess.PIPE) + + output = proc.communicate(content.encode('utf-8'))[0].decode('utf-8') + status = proc.wait() + if status: + raise subprocess.CalledProcessError(status, pandoc_cmd) return output, metadata - def add_reader(readers): readers.reader_classes['md'] = PandocReader - def register(): signals.readers_init.connect(add_reader)