Fixes and enhancements including:

* Add PANDOC_EXTENSIONS configuration variable, allowing one to enable or disable Pandoc's markdown extensions individually. * Remove dependency on pypandoc. * Don't change the working directory. * More efficient metadata extraction.
2014-11-26 13:42:37 -05:00
parent 1dceb54a01
commit 40d157895c
2 changed files with 35 additions and 25 deletions
--- a/README.md
+++ b/README.md
@@ -7,7 +7,6 @@ A pandoc [markdown] reader plugin for [pelican]
 Requirements
 ------------

-  - [pypandoc]
  - [pandoc] in $PATH


@@ -30,6 +29,13 @@ Additional command line parameters can be passed to pandoc via the PANDOC_ARGS p
      '--number-sections',
    ]

+Pandoc's markdown extensions can be enabled or disabled via the
+PANDOC_EXTENSIONS parameter.
+
+    PANDOC_EXTENSIONS = [
+      '+hard_line_breaks',
+      '-citations'
+    ]

 Contributing
 ------------
@@ -44,4 +50,3 @@ Contributing
 [markdown]: http://daringfireball.net/projects/markdown/
 [pandoc]: http://johnmacfarlane.net/pandoc/
 [pelican]: http://getpelican.com
-[pypandoc]: https://github.com/bebraw/pypandoc
--- a/pandoc_reader.py
+++ b/pandoc_reader.py
@@ -1,43 +1,48 @@
-import os
+import subprocess
+
 from pelican import signals
 from pelican.readers import BaseReader
 from pelican.utils import pelican_open
-import pypandoc
-

 class PandocReader(BaseReader):
    enabled = True
    file_extensions = ['md', 'markdown', 'mkd', 'mdown']

    def read(self, filename):
-        with pelican_open(filename) as text:
-            metadata_items = []
-            in_content = False
-            MD = ''
-            for line in text.splitlines():
-                splitted = line.split(':', 1)
-                if len(splitted) == 2 and not in_content:
-                    metadata_items.append(splitted)
-                else:
-                    in_content = True
-                    MD += line + '\n'
+        with pelican_open(filename) as fp:
+            text = list(fp.splitlines())

        metadata = {}
-            for name, value in metadata_items:
-                name = name.lower()
-                value = value.strip()
+        for i, line in enumerate(text):
+            kv = line.split(':', 1)
+            if len(kv) == 2:
+                name, value = kv[0].lower(), kv[1].strip()
                metadata[name] = self.process_metadata(name, value)
+            else:
+                content = "\n".join(text[i:])
+                break

-        os.chdir(self.settings['PATH']) # change the cwd to the content dir
-        if not 'PANDOC_ARGS' in self.settings: self.settings['PANDOC_ARGS'] = []
-        output = pypandoc.convert(MD, 'html5', format='md', extra_args=self.settings['PANDOC_ARGS'])
+        extra_args = self.settings.get('PANDOC_ARGS', [])
+        extensions = self.settings.get('PANDOC_EXTENSIONS', '')
+        if isinstance(extensions, list):
+            extensions = ''.join(extensions)
+
+        pandoc_cmd = ["pandoc", "--from=markdown" + extensions, "--to=html5"]
+        pandoc_cmd.extend(extra_args)
+
+        proc = subprocess.Popen(pandoc_cmd,
+                                stdin = subprocess.PIPE,
+                                stdout = subprocess.PIPE)
+
+        output = proc.communicate(content.encode('utf-8'))[0].decode('utf-8')
+        status = proc.wait()
+        if status:
+            raise subprocess.CalledProcessError(status, pandoc_cmd)

        return output, metadata

-
 def add_reader(readers):
    readers.reader_classes['md'] = PandocReader

-
 def register():
    signals.readers_init.connect(add_reader)