Add support for parsing YAML metadata.

2015-05-16 14:13:47 -04:00
parent 9ef0197eed
commit e078420af2
2 changed files with 75 additions and 12 deletions
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ Requirements
 ------------
  - [pandoc] in $PATH
-
+  - [PyYAML] installed if you want to parse [YAML metadata]
 Installation
 ------------
@@ -37,6 +37,28 @@ PANDOC_EXTENSIONS parameter.
      '-citations'
    ]
 YAML Metadata
 -------------
 No configuration is required to use YAML metadata. Simply include it at the top
 of your post, started by `---` and terminated by `---` or `...`. If PyYAML is
 not installed, the data will be parsed by the normal metadata parser instead.
 For example:
    ---
    title: Using YAML with Pandoc!
    author: Your Name
    date: 2015-05-15 14:07
    description: >
        You can include long, multiline descriptions which
        can wrap across multiple lines (and will be joined
        by YAML).
    complex:
        - or complex data structures
        - like lists
    ...
 Contributing
 ------------
@@ -50,3 +72,5 @@ Contributing
 [markdown]: http://daringfireball.net/projects/markdown/
 [pandoc]: http://johnmacfarlane.net/pandoc/
 [pelican]: http://getpelican.com
 [PyYAML]: http://pyyaml.org/
 [YAML metadata]: http://pandoc.org/README.html#extension-yaml_metadata_block
--- a/pandoc_reader.py
+++ b/pandoc_reader.py
@@ -1,25 +1,62 @@
 import subprocess
 from pelican import signals
 from pelican.readers import BaseReader
 from pelican.utils import pelican_open
 try:
    import yaml
 except ImportError:
    yaml = None
 class PandocReader(BaseReader):
    enabled = True
    file_extensions = ['md', 'markdown', 'mkd', 'mdown']
    def _get_meta_and_content(self, text):
        metadata = {}
        use_YAML = text[0] == '---' and yaml is not None
        if use_YAML:
            # Load the data we need to parse
            to_parse = []
            text = text[1:]
            for i, line in enumerate(text):
                # When we find a terminator (`---` or `...`), stop.
                if line == '---' or line == '...':
                    # Do not include the terminator itself.
                    content = "\n".join(text[i+1:])
                    break
                # Otherwise, just keep adding the lines to the parseable.
                to_parse.append(line)
            to_parse = "\n".join(to_parse)
            parsed = yaml.load(to_parse)
            # Postprocess to make the data usable by Pelican.
            for k in parsed:
                name, value = k.lower(), str(parsed[k]).strip()
                metadata[name] = self.process_metadata(name, value)
        else:
            for i, line in enumerate(text):
                kv = line.split(':', 1)
                if len(kv) == 2:
                    name, value = kv[0].lower(), kv[1].strip()
                    metadata[name] = self.process_metadata(name, value)
                else:
                    content = "\n".join(text[i:])
                    break
        return metadata, content
    def read(self, filename):
        with pelican_open(filename) as fp:
            text = list(fp.splitlines())
-        metadata = {}
+        metadata, content = self._get_meta_and_content(text)
        for i, line in enumerate(text):
            kv = line.split(':', 1)
            if len(kv) == 2:
                name, value = kv[0].lower(), kv[1].strip()
                metadata[name] = self.process_metadata(name, value)
            else:
                content = "\n".join(text[i:])
                break
        extra_args = self.settings.get('PANDOC_ARGS', [])
        extensions = self.settings.get('PANDOC_EXTENSIONS', '')
@@ -30,8 +67,8 @@ class PandocReader(BaseReader):
        pandoc_cmd.extend(extra_args)
        proc = subprocess.Popen(pandoc_cmd,
-                                stdin = subprocess.PIPE,
+                                stdin=subprocess.PIPE,
-                                stdout = subprocess.PIPE)
+                                stdout=subprocess.PIPE)
        output = proc.communicate(content.encode('utf-8'))[0].decode('utf-8')
        status = proc.wait()
@@ -40,9 +77,11 @@ class PandocReader(BaseReader):
        return output, metadata
 def add_reader(readers):
    for ext in PandocReader.file_extensions:
        readers.reader_classes[ext] = PandocReader
 def register():
    signals.readers_init.connect(add_reader)