Add support for parsing YAML metadata.

This commit is contained in:
Chris Krycho 2015-05-16 14:13:47 -04:00
parent 9ef0197eed
commit e078420af2
2 changed files with 75 additions and 12 deletions

View File

@ -8,7 +8,7 @@ Requirements
------------ ------------
- [pandoc] in $PATH - [pandoc] in $PATH
- [PyYAML] installed if you want to parse [YAML metadata]
Installation Installation
------------ ------------
@ -37,6 +37,28 @@ PANDOC_EXTENSIONS parameter.
'-citations' '-citations'
] ]
YAML Metadata
-------------
No configuration is required to use YAML metadata. Simply include it at the top
of your post, started by `---` and terminated by `---` or `...`. If PyYAML is
not installed, the data will be parsed by the normal metadata parser instead.
For example:
---
title: Using YAML with Pandoc!
author: Your Name
date: 2015-05-15 14:07
description: >
You can include long, multiline descriptions which
can wrap across multiple lines (and will be joined
by YAML).
complex:
- or complex data structures
- like lists
...
Contributing Contributing
------------ ------------
@ -50,3 +72,5 @@ Contributing
[markdown]: http://daringfireball.net/projects/markdown/ [markdown]: http://daringfireball.net/projects/markdown/
[pandoc]: http://johnmacfarlane.net/pandoc/ [pandoc]: http://johnmacfarlane.net/pandoc/
[pelican]: http://getpelican.com [pelican]: http://getpelican.com
[PyYAML]: http://pyyaml.org/
[YAML metadata]: http://pandoc.org/README.html#extension-yaml_metadata_block

View File

@ -1,25 +1,62 @@
import subprocess import subprocess
from pelican import signals from pelican import signals
from pelican.readers import BaseReader from pelican.readers import BaseReader
from pelican.utils import pelican_open from pelican.utils import pelican_open
try:
import yaml
except ImportError:
yaml = None
class PandocReader(BaseReader): class PandocReader(BaseReader):
enabled = True enabled = True
file_extensions = ['md', 'markdown', 'mkd', 'mdown'] file_extensions = ['md', 'markdown', 'mkd', 'mdown']
def _get_meta_and_content(self, text):
metadata = {}
use_YAML = text[0] == '---' and yaml is not None
if use_YAML:
# Load the data we need to parse
to_parse = []
text = text[1:]
for i, line in enumerate(text):
# When we find a terminator (`---` or `...`), stop.
if line == '---' or line == '...':
# Do not include the terminator itself.
content = "\n".join(text[i+1:])
break
# Otherwise, just keep adding the lines to the parseable.
to_parse.append(line)
to_parse = "\n".join(to_parse)
parsed = yaml.load(to_parse)
# Postprocess to make the data usable by Pelican.
for k in parsed:
name, value = k.lower(), str(parsed[k]).strip()
metadata[name] = self.process_metadata(name, value)
else:
for i, line in enumerate(text):
kv = line.split(':', 1)
if len(kv) == 2:
name, value = kv[0].lower(), kv[1].strip()
metadata[name] = self.process_metadata(name, value)
else:
content = "\n".join(text[i:])
break
return metadata, content
def read(self, filename): def read(self, filename):
with pelican_open(filename) as fp: with pelican_open(filename) as fp:
text = list(fp.splitlines()) text = list(fp.splitlines())
metadata = {} metadata, content = self._get_meta_and_content(text)
for i, line in enumerate(text):
kv = line.split(':', 1)
if len(kv) == 2:
name, value = kv[0].lower(), kv[1].strip()
metadata[name] = self.process_metadata(name, value)
else:
content = "\n".join(text[i:])
break
extra_args = self.settings.get('PANDOC_ARGS', []) extra_args = self.settings.get('PANDOC_ARGS', [])
extensions = self.settings.get('PANDOC_EXTENSIONS', '') extensions = self.settings.get('PANDOC_EXTENSIONS', '')
@ -30,8 +67,8 @@ class PandocReader(BaseReader):
pandoc_cmd.extend(extra_args) pandoc_cmd.extend(extra_args)
proc = subprocess.Popen(pandoc_cmd, proc = subprocess.Popen(pandoc_cmd,
stdin = subprocess.PIPE, stdin=subprocess.PIPE,
stdout = subprocess.PIPE) stdout=subprocess.PIPE)
output = proc.communicate(content.encode('utf-8'))[0].decode('utf-8') output = proc.communicate(content.encode('utf-8'))[0].decode('utf-8')
status = proc.wait() status = proc.wait()
@ -40,9 +77,11 @@ class PandocReader(BaseReader):
return output, metadata return output, metadata
def add_reader(readers): def add_reader(readers):
for ext in PandocReader.file_extensions: for ext in PandocReader.file_extensions:
readers.reader_classes[ext] = PandocReader readers.reader_classes[ext] = PandocReader
def register(): def register():
signals.readers_init.connect(add_reader) signals.readers_init.connect(add_reader)