Add support for parsing YAML metadata.

2015-05-16 14:13:47 -04:00 · 2015-05-16 14:13:47 -04:00 · e078420af2
commit e078420af2
parent 9ef0197eed
2 changed files with 75 additions and 12 deletions
--- a/README.md
+++ b/README.md
@ -8,7 +8,7 @@ Requirements
 ------------

  - [pandoc] in $PATH
-
+  - [PyYAML] installed if you want to parse [YAML metadata]

 Installation
 ------------
@ -37,6 +37,28 @@ PANDOC_EXTENSIONS parameter.
      '-citations'
    ]

+
+YAML Metadata
+-------------
+
+No configuration is required to use YAML metadata. Simply include it at the top
+of your post, started by `---` and terminated by `---` or `...`. If PyYAML is
+not installed, the data will be parsed by the normal metadata parser instead.
+For example:
+
+    ---
+    title: Using YAML with Pandoc!
+    author: Your Name
+    date: 2015-05-15 14:07
+    description: >
+        You can include long, multiline descriptions which
+        can wrap across multiple lines (and will be joined
+        by YAML).
+    complex:
+        - or complex data structures
+        - like lists
+    ...
+
 Contributing
 ------------

@ -50,3 +72,5 @@ Contributing
 [markdown]: http://daringfireball.net/projects/markdown/
 [pandoc]: http://johnmacfarlane.net/pandoc/
 [pelican]: http://getpelican.com
+[PyYAML]: http://pyyaml.org/
+[YAML metadata]: http://pandoc.org/README.html#extension-yaml_metadata_block
--- a/pandoc_reader.py
+++ b/pandoc_reader.py
@ -1,25 +1,62 @@
 import subprocess
+
 from pelican import signals
 from pelican.readers import BaseReader
 from pelican.utils import pelican_open

+try:
+    import yaml
+except ImportError:
+    yaml = None
+
+
 class PandocReader(BaseReader):
    enabled = True
    file_extensions = ['md', 'markdown', 'mkd', 'mdown']

+    def _get_meta_and_content(self, text):
+        metadata = {}
+
+        use_YAML = text[0] == '---' and yaml is not None
+        if use_YAML:
+            # Load the data we need to parse
+            to_parse = []
+            text = text[1:]
+            for i, line in enumerate(text):
+                # When we find a terminator (`---` or `...`), stop.
+                if line == '---' or line == '...':
+                    # Do not include the terminator itself.
+                    content = "\n".join(text[i+1:])
+                    break
+
+                # Otherwise, just keep adding the lines to the parseable.
+                to_parse.append(line)
+
+            to_parse = "\n".join(to_parse)
+            parsed = yaml.load(to_parse)
+
+            # Postprocess to make the data usable by Pelican.
+            for k in parsed:
+                name, value = k.lower(), str(parsed[k]).strip()
+                metadata[name] = self.process_metadata(name, value)
+
+        else:
+            for i, line in enumerate(text):
+                kv = line.split(':', 1)
+                if len(kv) == 2:
+                    name, value = kv[0].lower(), kv[1].strip()
+                    metadata[name] = self.process_metadata(name, value)
+                else:
+                    content = "\n".join(text[i:])
+                    break
+
+        return metadata, content
+
    def read(self, filename):
        with pelican_open(filename) as fp:
            text = list(fp.splitlines())

-        metadata = {}
-        for i, line in enumerate(text):
-            kv = line.split(':', 1)
-            if len(kv) == 2:
-                name, value = kv[0].lower(), kv[1].strip()
-                metadata[name] = self.process_metadata(name, value)
-            else:
-                content = "\n".join(text[i:])
-                break
+        metadata, content = self._get_meta_and_content(text)

        extra_args = self.settings.get('PANDOC_ARGS', [])
        extensions = self.settings.get('PANDOC_EXTENSIONS', '')
@ -30,8 +67,8 @@ class PandocReader(BaseReader):
        pandoc_cmd.extend(extra_args)

        proc = subprocess.Popen(pandoc_cmd,
-                                stdin = subprocess.PIPE,
-                                stdout = subprocess.PIPE)
+                                stdin=subprocess.PIPE,
+                                stdout=subprocess.PIPE)

        output = proc.communicate(content.encode('utf-8'))[0].decode('utf-8')
        status = proc.wait()
@ -40,9 +77,11 @@ class PandocReader(BaseReader):

        return output, metadata

+
 def add_reader(readers):
    for ext in PandocReader.file_extensions:
        readers.reader_classes[ext] = PandocReader

+
 def register():
    signals.readers_init.connect(add_reader)