From e078420af2e23ef74eeb06fbe83e3a170a380a1b Mon Sep 17 00:00:00 2001 From: Chris Krycho Date: Sat, 16 May 2015 14:13:47 -0400 Subject: [PATCH] Add support for parsing YAML metadata. --- README.md | 26 ++++++++++++++++++++- pandoc_reader.py | 61 +++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 75 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 08055fc..057b456 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Requirements ------------ - [pandoc] in $PATH - + - [PyYAML] installed if you want to parse [YAML metadata] Installation ------------ @@ -37,6 +37,28 @@ PANDOC_EXTENSIONS parameter. '-citations' ] + +YAML Metadata +------------- + +No configuration is required to use YAML metadata. Simply include it at the top +of your post, started by `---` and terminated by `---` or `...`. If PyYAML is +not installed, the data will be parsed by the normal metadata parser instead. +For example: + + --- + title: Using YAML with Pandoc! + author: Your Name + date: 2015-05-15 14:07 + description: > + You can include long, multiline descriptions which + can wrap across multiple lines (and will be joined + by YAML). + complex: + - or complex data structures + - like lists + ... + Contributing ------------ @@ -50,3 +72,5 @@ Contributing [markdown]: http://daringfireball.net/projects/markdown/ [pandoc]: http://johnmacfarlane.net/pandoc/ [pelican]: http://getpelican.com +[PyYAML]: http://pyyaml.org/ +[YAML metadata]: http://pandoc.org/README.html#extension-yaml_metadata_block \ No newline at end of file diff --git a/pandoc_reader.py b/pandoc_reader.py index 87c7735..f4e5322 100644 --- a/pandoc_reader.py +++ b/pandoc_reader.py @@ -1,25 +1,62 @@ import subprocess + from pelican import signals from pelican.readers import BaseReader from pelican.utils import pelican_open +try: + import yaml +except ImportError: + yaml = None + + class PandocReader(BaseReader): enabled = True file_extensions = ['md', 'markdown', 'mkd', 'mdown'] + def _get_meta_and_content(self, text): + metadata = {} + + use_YAML = text[0] == '---' and yaml is not None + if use_YAML: + # Load the data we need to parse + to_parse = [] + text = text[1:] + for i, line in enumerate(text): + # When we find a terminator (`---` or `...`), stop. + if line == '---' or line == '...': + # Do not include the terminator itself. + content = "\n".join(text[i+1:]) + break + + # Otherwise, just keep adding the lines to the parseable. + to_parse.append(line) + + to_parse = "\n".join(to_parse) + parsed = yaml.load(to_parse) + + # Postprocess to make the data usable by Pelican. + for k in parsed: + name, value = k.lower(), str(parsed[k]).strip() + metadata[name] = self.process_metadata(name, value) + + else: + for i, line in enumerate(text): + kv = line.split(':', 1) + if len(kv) == 2: + name, value = kv[0].lower(), kv[1].strip() + metadata[name] = self.process_metadata(name, value) + else: + content = "\n".join(text[i:]) + break + + return metadata, content + def read(self, filename): with pelican_open(filename) as fp: text = list(fp.splitlines()) - metadata = {} - for i, line in enumerate(text): - kv = line.split(':', 1) - if len(kv) == 2: - name, value = kv[0].lower(), kv[1].strip() - metadata[name] = self.process_metadata(name, value) - else: - content = "\n".join(text[i:]) - break + metadata, content = self._get_meta_and_content(text) extra_args = self.settings.get('PANDOC_ARGS', []) extensions = self.settings.get('PANDOC_EXTENSIONS', '') @@ -30,8 +67,8 @@ class PandocReader(BaseReader): pandoc_cmd.extend(extra_args) proc = subprocess.Popen(pandoc_cmd, - stdin = subprocess.PIPE, - stdout = subprocess.PIPE) + stdin=subprocess.PIPE, + stdout=subprocess.PIPE) output = proc.communicate(content.encode('utf-8'))[0].decode('utf-8') status = proc.wait() @@ -40,9 +77,11 @@ class PandocReader(BaseReader): return output, metadata + def add_reader(readers): for ext in PandocReader.file_extensions: readers.reader_classes[ext] = PandocReader + def register(): signals.readers_init.connect(add_reader)