Fixes and enhancements including:

* Add PANDOC_EXTENSIONS configuration variable, allowing one to
   enable or disable Pandoc's markdown extensions individually.
 * Remove dependency on pypandoc.
 * Don't change the working directory.
 * More efficient metadata extraction.
This commit is contained in:
Zack Weinberg 2014-11-26 13:42:37 -05:00
parent 1dceb54a01
commit 40d157895c
2 changed files with 35 additions and 25 deletions

View File

@ -7,7 +7,6 @@ A pandoc [markdown] reader plugin for [pelican]
Requirements
------------
- [pypandoc]
- [pandoc] in $PATH
@ -30,6 +29,13 @@ Additional command line parameters can be passed to pandoc via the PANDOC_ARGS p
'--number-sections',
]
Pandoc's markdown extensions can be enabled or disabled via the
PANDOC_EXTENSIONS parameter.
PANDOC_EXTENSIONS = [
'+hard_line_breaks',
'-citations'
]
Contributing
------------
@ -44,4 +50,3 @@ Contributing
[markdown]: http://daringfireball.net/projects/markdown/
[pandoc]: http://johnmacfarlane.net/pandoc/
[pelican]: http://getpelican.com
[pypandoc]: https://github.com/bebraw/pypandoc

View File

@ -1,43 +1,48 @@
import os
import subprocess
from pelican import signals
from pelican.readers import BaseReader
from pelican.utils import pelican_open
import pypandoc
class PandocReader(BaseReader):
enabled = True
file_extensions = ['md', 'markdown', 'mkd', 'mdown']
def read(self, filename):
with pelican_open(filename) as text:
metadata_items = []
in_content = False
MD = ''
for line in text.splitlines():
splitted = line.split(':', 1)
if len(splitted) == 2 and not in_content:
metadata_items.append(splitted)
else:
in_content = True
MD += line + '\n'
with pelican_open(filename) as fp:
text = list(fp.splitlines())
metadata = {}
for name, value in metadata_items:
name = name.lower()
value = value.strip()
for i, line in enumerate(text):
kv = line.split(':', 1)
if len(kv) == 2:
name, value = kv[0].lower(), kv[1].strip()
metadata[name] = self.process_metadata(name, value)
else:
content = "\n".join(text[i:])
break
os.chdir(self.settings['PATH']) # change the cwd to the content dir
if not 'PANDOC_ARGS' in self.settings: self.settings['PANDOC_ARGS'] = []
output = pypandoc.convert(MD, 'html5', format='md', extra_args=self.settings['PANDOC_ARGS'])
extra_args = self.settings.get('PANDOC_ARGS', [])
extensions = self.settings.get('PANDOC_EXTENSIONS', '')
if isinstance(extensions, list):
extensions = ''.join(extensions)
pandoc_cmd = ["pandoc", "--from=markdown" + extensions, "--to=html5"]
pandoc_cmd.extend(extra_args)
proc = subprocess.Popen(pandoc_cmd,
stdin = subprocess.PIPE,
stdout = subprocess.PIPE)
output = proc.communicate(content.encode('utf-8'))[0].decode('utf-8')
status = proc.wait()
if status:
raise subprocess.CalledProcessError(status, pandoc_cmd)
return output, metadata
def add_reader(readers):
readers.reader_classes['md'] = PandocReader
def register():
signals.readers_init.connect(add_reader)