Merge pull request #2 from zackw/master

I very much like, what you have done with the code. I agree, that it is nice to be able to declare pandoc extensions. On the other hand, I disapproved at taking out pypandoc at first. However, after taking a closer look at the source of pypandoc, I came to realize, that in this case we do not hugely benefit by using an abstraction layer. Your code works nicely with my setup with the exception that you removed the change of the cwd. However, as pandoc_reader is still a very young project, I think we can afford to break compatibility. Thank you very much for your contribution!
2014-12-06 14:22:47 +01:00
parent 1dceb54a01 40d157895c
commit 70b2c1d1cc
2 changed files with 35 additions and 25 deletions
--- a/README.md
+++ b/README.md
@@ -7,7 +7,6 @@ A pandoc [markdown] reader plugin for [pelican]
 Requirements
 ------------

-  - [pypandoc]
  - [pandoc] in $PATH


@@ -30,6 +29,13 @@ Additional command line parameters can be passed to pandoc via the PANDOC_ARGS p
      '--number-sections',
    ]

+Pandoc's markdown extensions can be enabled or disabled via the
+PANDOC_EXTENSIONS parameter.
+
+    PANDOC_EXTENSIONS = [
+      '+hard_line_breaks',
+      '-citations'
+    ]

 Contributing
 ------------
@@ -44,4 +50,3 @@ Contributing
 [markdown]: http://daringfireball.net/projects/markdown/
 [pandoc]: http://johnmacfarlane.net/pandoc/
 [pelican]: http://getpelican.com
-[pypandoc]: https://github.com/bebraw/pypandoc
--- a/pandoc_reader.py
+++ b/pandoc_reader.py
@@ -1,43 +1,48 @@
-import os
+import subprocess
+
 from pelican import signals
 from pelican.readers import BaseReader
 from pelican.utils import pelican_open
-import pypandoc
-

 class PandocReader(BaseReader):
    enabled = True
    file_extensions = ['md', 'markdown', 'mkd', 'mdown']

    def read(self, filename):
-        with pelican_open(filename) as text:
-            metadata_items = []
-            in_content = False
-            MD = ''
-            for line in text.splitlines():
-                splitted = line.split(':', 1)
-                if len(splitted) == 2 and not in_content:
-                    metadata_items.append(splitted)
-                else:
-                    in_content = True
-                    MD += line + '\n'
+        with pelican_open(filename) as fp:
+            text = list(fp.splitlines())

        metadata = {}
-            for name, value in metadata_items:
-                name = name.lower()
-                value = value.strip()
+        for i, line in enumerate(text):
+            kv = line.split(':', 1)
+            if len(kv) == 2:
+                name, value = kv[0].lower(), kv[1].strip()
                metadata[name] = self.process_metadata(name, value)
+            else:
+                content = "\n".join(text[i:])
+                break

-        os.chdir(self.settings['PATH']) # change the cwd to the content dir
-        if not 'PANDOC_ARGS' in self.settings: self.settings['PANDOC_ARGS'] = []
-        output = pypandoc.convert(MD, 'html5', format='md', extra_args=self.settings['PANDOC_ARGS'])
+        extra_args = self.settings.get('PANDOC_ARGS', [])
+        extensions = self.settings.get('PANDOC_EXTENSIONS', '')
+        if isinstance(extensions, list):
+            extensions = ''.join(extensions)
+
+        pandoc_cmd = ["pandoc", "--from=markdown" + extensions, "--to=html5"]
+        pandoc_cmd.extend(extra_args)
+
+        proc = subprocess.Popen(pandoc_cmd,
+                                stdin = subprocess.PIPE,
+                                stdout = subprocess.PIPE)
+
+        output = proc.communicate(content.encode('utf-8'))[0].decode('utf-8')
+        status = proc.wait()
+        if status:
+            raise subprocess.CalledProcessError(status, pandoc_cmd)

        return output, metadata

-
 def add_reader(readers):
    readers.reader_classes['md'] = PandocReader

-
 def register():
    signals.readers_init.connect(add_reader)