Merge pull request #2 from zackw/master

liob · liob · commit 70b2c1d1cca5 · 2014-12-06T14:22:47.000+01:00
I very much like, what you have done with the code. I agree, that it is nice to be able to declare pandoc extensions.

On the other hand, I disapproved at taking out pypandoc at first. However, after taking a closer look at the source of pypandoc, I came to realize, that in this case we do not hugely benefit by using an abstraction layer.

Your code works nicely with my setup with the exception that you removed the change of the cwd. However, as pandoc_reader is still a very young project, I think we can afford to break compatibility.

Thank you very much for your contribution!
diff --git a/README.md b/README.md
@@ -7,7 +7,6 @@ A pandoc [markdown] reader plugin for [pelican]
 Requirements
 ------------
 
-  - [pypandoc]
   - [pandoc] in $PATH
 
 
@@ -30,6 +29,13 @@ Additional command line parameters can be passed to pandoc via the PANDOC_ARGS p
       '--number-sections',
     ]
 
+Pandoc's markdown extensions can be enabled or disabled via the
+PANDOC_EXTENSIONS parameter.
+
+    PANDOC_EXTENSIONS = [
+      '+hard_line_breaks',
+      '-citations'
+    ]
 
 Contributing
 ------------
@@ -44,4 +50,3 @@ Contributing
 [markdown]: http://daringfireball.net/projects/markdown/
 [pandoc]: http://johnmacfarlane.net/pandoc/
 [pelican]: http://getpelican.com
-[pypandoc]: https://github.com/bebraw/pypandoc
diff --git a/pandoc_reader.py b/pandoc_reader.py
@@ -1,43 +1,48 @@
-import os
+import subprocess
+
 from pelican import signals
 from pelican.readers import BaseReader
 from pelican.utils import pelican_open
-import pypandoc
-
 
 class PandocReader(BaseReader):
     enabled = True
     file_extensions = ['md', 'markdown', 'mkd', 'mdown']
 
     def read(self, filename):
-        with pelican_open(filename) as text:
-            metadata_items = []
-            in_content = False
-            MD = ''
-            for line in text.splitlines():
-                splitted = line.split(':', 1)
-                if len(splitted) == 2 and not in_content:
-                    metadata_items.append(splitted)
-                else:
-                    in_content = True
-                    MD += line + '\n'
-
-            metadata = {}
-            for name, value in metadata_items:
-                name = name.lower()
-                value = value.strip()
+        with pelican_open(filename) as fp:
+            text = list(fp.splitlines())
+
+        metadata = {}
+        for i, line in enumerate(text):
+            kv = line.split(':', 1)
+            if len(kv) == 2:
+                name, value = kv[0].lower(), kv[1].strip()
                 metadata[name] = self.process_metadata(name, value)
+            else:
+                content = "\n".join(text[i:])
+                break
 
-        os.chdir(self.settings['PATH']) # change the cwd to the content dir
-        if not 'PANDOC_ARGS' in self.settings: self.settings['PANDOC_ARGS'] = []
-        output = pypandoc.convert(MD, 'html5', format='md', extra_args=self.settings['PANDOC_ARGS'])
+        extra_args = self.settings.get('PANDOC_ARGS', [])
+        extensions = self.settings.get('PANDOC_EXTENSIONS', '')
+        if isinstance(extensions, list):
+            extensions = ''.join(extensions)
 
-        return output, metadata
+        pandoc_cmd = ["pandoc", "--from=markdown" + extensions, "--to=html5"]
+        pandoc_cmd.extend(extra_args)
+
+        proc = subprocess.Popen(pandoc_cmd,
+                                stdin = subprocess.PIPE,
+                                stdout = subprocess.PIPE)
 
+        output = proc.communicate(content.encode('utf-8'))[0].decode('utf-8')
+        status = proc.wait()
+        if status:
+            raise subprocess.CalledProcessError(status, pandoc_cmd)
+
+        return output, metadata
 
 def add_reader(readers):
     readers.reader_classes['md'] = PandocReader
 
-
 def register():
     signals.readers_init.connect(add_reader)