2016-08-05 02:52:50 +00:00
|
|
|
import logging
|
2014-11-26 18:42:37 +00:00
|
|
|
import subprocess
|
2015-05-16 18:13:47 +00:00
|
|
|
|
2014-03-26 10:35:27 +00:00
|
|
|
from pelican import signals
|
|
|
|
from pelican.readers import BaseReader
|
2014-03-30 14:02:53 +00:00
|
|
|
from pelican.utils import pelican_open
|
2019-02-22 10:02:27 +00:00
|
|
|
import os
|
2014-08-27 07:41:47 +00:00
|
|
|
|
2015-05-16 18:13:47 +00:00
|
|
|
try:
|
|
|
|
import yaml
|
|
|
|
except ImportError:
|
|
|
|
yaml = None
|
2016-08-05 02:52:50 +00:00
|
|
|
logging.warning("YAML is not installed; the YAML reader will not work.")
|
2015-05-16 18:13:47 +00:00
|
|
|
|
|
|
|
|
2014-08-27 07:41:47 +00:00
|
|
|
class PandocReader(BaseReader):
|
2014-03-26 10:35:27 +00:00
|
|
|
enabled = True
|
|
|
|
file_extensions = ['md', 'markdown', 'mkd', 'mdown']
|
|
|
|
|
2015-05-16 18:13:47 +00:00
|
|
|
def _get_meta_and_content(self, text):
|
|
|
|
metadata = {}
|
|
|
|
|
|
|
|
use_YAML = text[0] == '---' and yaml is not None
|
|
|
|
if use_YAML:
|
|
|
|
# Load the data we need to parse
|
|
|
|
to_parse = []
|
2016-08-05 02:52:50 +00:00
|
|
|
for i, line in enumerate(text[1:]):
|
2015-05-16 18:13:47 +00:00
|
|
|
# When we find a terminator (`---` or `...`), stop.
|
2016-08-05 02:52:50 +00:00
|
|
|
if line in ('---', '...'):
|
2015-05-16 18:13:47 +00:00
|
|
|
# Do not include the terminator itself.
|
|
|
|
break
|
|
|
|
|
|
|
|
# Otherwise, just keep adding the lines to the parseable.
|
|
|
|
to_parse.append(line)
|
|
|
|
|
2016-08-05 02:52:50 +00:00
|
|
|
parsed = yaml.load("\n".join(to_parse))
|
2015-05-16 18:13:47 +00:00
|
|
|
|
|
|
|
# Postprocess to make the data usable by Pelican.
|
|
|
|
for k in parsed:
|
2016-08-05 02:52:50 +00:00
|
|
|
name, value = k.lower(), parsed[k]
|
2015-05-16 18:13:47 +00:00
|
|
|
metadata[name] = self.process_metadata(name, value)
|
|
|
|
|
2016-08-05 02:52:50 +00:00
|
|
|
# Return the text entirely.
|
|
|
|
content = "\n".join(text)
|
|
|
|
|
2015-05-16 18:13:47 +00:00
|
|
|
else:
|
|
|
|
for i, line in enumerate(text):
|
|
|
|
kv = line.split(':', 1)
|
|
|
|
if len(kv) == 2:
|
|
|
|
name, value = kv[0].lower(), kv[1].strip()
|
|
|
|
metadata[name] = self.process_metadata(name, value)
|
|
|
|
else:
|
|
|
|
content = "\n".join(text[i:])
|
|
|
|
break
|
|
|
|
|
|
|
|
return metadata, content
|
|
|
|
|
2014-03-26 10:35:27 +00:00
|
|
|
def read(self, filename):
|
2014-11-26 18:42:37 +00:00
|
|
|
with pelican_open(filename) as fp:
|
|
|
|
text = list(fp.splitlines())
|
|
|
|
|
2015-05-16 18:13:47 +00:00
|
|
|
metadata, content = self._get_meta_and_content(text)
|
2019-02-22 10:02:27 +00:00
|
|
|
bib_dir = self.settings.get('PANDOC_BIBDIR', '')
|
|
|
|
|
|
|
|
bib_header = self.settings.get('PANDOC_BIBHEADER', None)
|
|
|
|
|
|
|
|
# filters = self.settings.get('PANDOC_FILTERS', [])
|
|
|
|
extensions = self.settings.get('PANDOC_EXTENSIONS', '')
|
|
|
|
if isinstance(extensions, list):
|
|
|
|
extensions = ''.join(extensions)
|
|
|
|
|
2014-03-26 10:35:27 +00:00
|
|
|
|
2014-11-26 18:42:37 +00:00
|
|
|
extra_args = self.settings.get('PANDOC_ARGS', [])
|
|
|
|
extensions = self.settings.get('PANDOC_EXTENSIONS', '')
|
|
|
|
if isinstance(extensions, list):
|
|
|
|
extensions = ''.join(extensions)
|
2014-03-26 10:35:27 +00:00
|
|
|
|
2014-11-26 18:42:37 +00:00
|
|
|
pandoc_cmd = ["pandoc", "--from=markdown" + extensions, "--to=html5"]
|
|
|
|
pandoc_cmd.extend(extra_args)
|
|
|
|
|
2018-07-14 02:32:30 +00:00
|
|
|
if "bibliography" in metadata.keys():
|
|
|
|
bib_file = os.path.join(bib_dir, metadata['bibliography'])
|
|
|
|
extra_args = extra_args + ['--bibliography={}'.format(bib_file)]
|
|
|
|
|
|
|
|
if bib_header is not None:
|
|
|
|
extra_args = extra_args + [
|
|
|
|
'--metadata=reference-section-title="{}"'.format(
|
|
|
|
bib_header)]
|
|
|
|
|
2014-11-26 18:42:37 +00:00
|
|
|
proc = subprocess.Popen(pandoc_cmd,
|
2015-05-16 18:13:47 +00:00
|
|
|
stdin=subprocess.PIPE,
|
|
|
|
stdout=subprocess.PIPE)
|
2014-03-26 10:35:27 +00:00
|
|
|
|
2014-11-26 18:42:37 +00:00
|
|
|
output = proc.communicate(content.encode('utf-8'))[0].decode('utf-8')
|
|
|
|
status = proc.wait()
|
2018-07-14 02:32:30 +00:00
|
|
|
|
|
|
|
# Just in case, let's make sure we don't lose Pelican template
|
|
|
|
# parameters.
|
|
|
|
output = output.replace('%7Battach%7D', '{attach}')\
|
|
|
|
.replace('%7Bfilename%7D', '{filename}')\
|
|
|
|
.replace('%7Btag%7D', '{tag}')\
|
|
|
|
.replace('%7Bcategory%7D', '{category}')
|
|
|
|
|
2019-02-22 09:37:30 +00:00
|
|
|
return output, metadata
|
2014-08-27 07:41:47 +00:00
|
|
|
|
2015-05-16 18:13:47 +00:00
|
|
|
|
2014-03-26 10:35:27 +00:00
|
|
|
def add_reader(readers):
|
2014-11-25 16:23:07 +00:00
|
|
|
for ext in PandocReader.file_extensions:
|
|
|
|
readers.reader_classes[ext] = PandocReader
|
2014-08-27 07:41:47 +00:00
|
|
|
|
2015-05-16 18:13:47 +00:00
|
|
|
|
2014-03-26 10:35:27 +00:00
|
|
|
def register():
|
|
|
|
signals.readers_init.connect(add_reader)
|