pelican-pandoc-reader/pandoc_reader.py

116 lines
3.7 KiB
Python
Raw Normal View History

import logging
import subprocess
2015-05-16 18:13:47 +00:00
2014-03-26 10:35:27 +00:00
from pelican import signals
from pelican.readers import BaseReader
from pelican.utils import pelican_open
import os
2014-08-27 07:41:47 +00:00
2015-05-16 18:13:47 +00:00
try:
import yaml
except ImportError:
yaml = None
logging.warning("YAML is not installed; the YAML reader will not work.")
2015-05-16 18:13:47 +00:00
2014-08-27 07:41:47 +00:00
class PandocReader(BaseReader):
2014-03-26 10:35:27 +00:00
enabled = True
file_extensions = ['md', 'markdown', 'mkd', 'mdown']
2015-05-16 18:13:47 +00:00
def _get_meta_and_content(self, text):
metadata = {}
use_YAML = text[0] == '---' and yaml is not None
if use_YAML:
# Load the data we need to parse
to_parse = []
for i, line in enumerate(text[1:]):
2015-05-16 18:13:47 +00:00
# When we find a terminator (`---` or `...`), stop.
if line in ('---', '...'):
2015-05-16 18:13:47 +00:00
# Do not include the terminator itself.
break
# Otherwise, just keep adding the lines to the parseable.
to_parse.append(line)
parsed = yaml.load("\n".join(to_parse))
2015-05-16 18:13:47 +00:00
# Postprocess to make the data usable by Pelican.
for k in parsed:
name, value = k.lower(), parsed[k]
2015-05-16 18:13:47 +00:00
metadata[name] = self.process_metadata(name, value)
# Return the text entirely.
content = "\n".join(text)
2015-05-16 18:13:47 +00:00
else:
for i, line in enumerate(text):
kv = line.split(':', 1)
if len(kv) == 2:
name, value = kv[0].lower(), kv[1].strip()
metadata[name] = self.process_metadata(name, value)
else:
content = "\n".join(text[i:])
break
return metadata, content
2014-03-26 10:35:27 +00:00
def read(self, filename):
with pelican_open(filename) as fp:
text = list(fp.splitlines())
2015-05-16 18:13:47 +00:00
metadata, content = self._get_meta_and_content(text)
bib_dir = self.settings.get('PANDOC_BIBDIR', '')
bib_header = self.settings.get('PANDOC_BIBHEADER', None)
filters = self.settings.get('PANDOC_FILTERS', [])
extensions = self.settings.get('PANDOC_EXTENSIONS', '')
if isinstance(extensions, list):
extensions = ''.join(extensions)
extra_args = self.settings.get('PANDOC_ARGS', [])
extensions = self.settings.get('PANDOC_EXTENSIONS', '')
if isinstance(extensions, list):
extensions = ''.join(extensions)
2014-03-26 10:35:27 +00:00
pandoc_cmd = ["pandoc", "--from=markdown" + extensions, "--to=html5"]
for filt in filters:
pandoc_cmd.extend(["--filter", filt])
pandoc_cmd.extend(extra_args)
if "bibliography" in metadata.keys():
bib_file = os.path.join(bib_dir, metadata['bibliography'])
extra_args = extra_args + ['--bibliography={}'.format(bib_file)]
if bib_header is not None:
extra_args = extra_args + [
'--metadata=reference-section-title="{}"'.format(
bib_header)]
proc = subprocess.Popen(
pandoc_cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
2014-03-26 10:35:27 +00:00
output = proc.communicate(content.encode('utf-8'))[0].decode('utf-8')
status = proc.wait()
# Just in case, let's make sure we don't lose Pelican template
# parameters.
output = output.replace('%7Battach%7D', '{attach}')\
.replace('%7Bfilename%7D', '{filename}')\
.replace('%7Btag%7D', '{tag}')\
.replace('%7Bcategory%7D', '{category}')
2019-02-22 09:37:30 +00:00
return output, metadata
2014-08-27 07:41:47 +00:00
2015-05-16 18:13:47 +00:00
2014-03-26 10:35:27 +00:00
def add_reader(readers):
for ext in PandocReader.file_extensions:
readers.reader_classes[ext] = PandocReader
2014-08-27 07:41:47 +00:00
2015-05-16 18:13:47 +00:00
2014-03-26 10:35:27 +00:00
def register():
signals.readers_init.connect(add_reader)