pelican-pandoc-reader/pandoc_reader.py

"""Reader that processes Pandoc Markdown and returns HTML5."""
import json
import math
import os
import shutil
import subprocess

import bs4
from mwc.counter import count_words_in_markdown
from ruamel.yaml import YAML, constructor

from pelican import signals
from pelican.readers import BaseReader
from pelican.utils import pelican_open

DEFAULT_READING_SPEED = 200  # Words per minute
DEFAULT_PANDOC_EXECUTABLE = "pandoc"
DIR_PATH = os.path.dirname(__file__)
ENCODED_LINKS_TO_RAW_LINKS_MAP = {
    "%7Bstatic%7D": "{static}",
    "%7Battach%7D": "{attach}",
    "%7Bfilename%7D": "{filename}",
}
FILE_EXTENSIONS = ["md", "mkd", "mkdn", "mdwn", "mdown", "markdown", "Rmd"]
PANDOC_READER_HTML_TEMPLATE = "pandoc-reader-default.html"
PANDOC_SUPPORTED_MAJOR_VERSION = 2
PANDOC_SUPPORTED_MINOR_VERSION = 11

TEMPLATES_PATH = os.path.abspath(os.path.join(DIR_PATH, "templates"))
UNSUPPORTED_ARGUMENTS = ("--standalone", "--self-contained")
VALID_BIB_EXTENSIONS = ["json", "yaml", "bibtex", "bib"]

# Markdown variants supported in defaults files
# Update as Pandoc adds or removes support for formats
VALID_INPUT_FORMATS = (
    "commonmark",
    "commonmark_x",
    "gfm",
    "markdown",
    "markdown_mmd",
    "markdown_phpextra",
    "markdown_strict",
)
VALID_OUTPUT_FORMATS = ("html", "html5")


class PandocReader(BaseReader):
    """Convert files written in Pandoc Markdown to HTML 5."""

    enabled = True
    file_extensions = FILE_EXTENSIONS

    def read(self, source_path):
        """Parse Pandoc Markdown and return HTML5 markup and metadata."""
        # Get the user-defined path to the Pandoc executable or fall back to default
        pandoc_executable = self.settings.get(
            "PANDOC_EXECUTABLE_PATH", DEFAULT_PANDOC_EXECUTABLE
        )

        # If user-defined path, expand and make it absolute in case the path is relative
        if pandoc_executable != DEFAULT_PANDOC_EXECUTABLE:
            pandoc_executable = os.path.abspath(os.path.expanduser(pandoc_executable))

        # Check if pandoc is installed and is executable
        if not shutil.which(pandoc_executable):
            raise Exception("Could not find Pandoc. Please install.")

        # Check if the version of pandoc installed is 2.11 or higher
        self._check_pandoc_version(pandoc_executable)

        # Open Markdown file and read content
        content = ""
        with pelican_open(source_path) as file_content:
            content = file_content

        # Retrieve HTML content and metadata
        output, metadata = self._create_html(source_path, content, pandoc_executable)

        return output, metadata

    def _create_html(self, source_path, content, pandoc_executable):
        """Create HTML5 content."""
        # Get settings set in pelicanconf.py
        defaults_files = self.settings.get("PANDOC_DEFAULTS_FILES", [])

        # Adding support for the old defaults file setting
        # which will be removed in future versions of this plugin
        if self.settings.get("PANDOC_DEFAULT_FILES", []):
            defaults_files = self.settings.get("PANDOC_DEFAULT_FILES")

        arguments = self.settings.get("PANDOC_ARGS", [])
        extensions = self.settings.get("PANDOC_EXTENSIONS", [])

        if isinstance(extensions, list):
            extensions = "".join(extensions)

        # Check if source content has a YAML metadata block
        self._check_yaml_metadata_block(content)

        # Check validity of arguments or defaults files
        table_of_contents, citations = self._validate_fields(
            defaults_files, arguments, extensions
        )

        # Construct preliminary pandoc command
        pandoc_cmd = self._construct_pandoc_command(
            pandoc_executable, defaults_files, arguments, extensions
        )

        # Find and add bibliography if citations are specified
        if citations:
            for bib_file in self._find_bibs(source_path):
                pandoc_cmd.append("--bibliography={0}".format(bib_file))

        # Create HTML content using pandoc-reader-default.html template
        output = self._run_pandoc(pandoc_cmd, content)

        # Extract table of contents, text and metadata from HTML output
        output, toc, pandoc_metadata = self._extract_contents(output, table_of_contents)

        # Replace all occurrences of %7Bstatic%7D to {static},
        # %7Battach%7D to {attach} and %7Bfilename%7D to {filename}
        # so that static links are resolvable by pelican
        for encoded_str, raw_str in ENCODED_LINKS_TO_RAW_LINKS_MAP.items():
            output = output.replace(encoded_str, raw_str)

        # Parse Pandoc metadata and add it to Pelican
        metadata = self._process_metadata(pandoc_metadata)

        if table_of_contents:
            # Create table of contents and add to metadata
            metadata["toc"] = self.process_metadata("toc", toc)

        if self.settings.get("CALCULATE_READING_TIME", []):
            # Calculate reading time and add to metadata
            metadata["reading_time"] = self.process_metadata(
                "reading_time", self._calculate_reading_time(content)
            )

        return output, metadata

    def _validate_fields(self, defaults_files, arguments, extensions):
        """Validate fields and return citations and ToC request values."""
        # If defaults_files is empty then validate the argument and extensions
        if not defaults_files:
            # Validate the arguments to see that they are supported
            # by the plugin
            self._check_arguments(arguments)

            # Check if citations have been requested
            citations = self._check_if_citations(arguments, extensions)

            # Check if table of contents has been requested
            table_of_contents = self._check_if_toc(arguments)
        else:
            # Validate defaults files and get the citations
            # abd table of contents request value
            citations, table_of_contents = self._check_defaults(defaults_files)
        return table_of_contents, citations

    def _check_defaults(self, defaults_files):
        """Check if the given Pandoc defaults file has valid values."""
        citations = False
        table_of_contents = False

        # Get the data in all defaults files as a string
        defaults_data = ""
        for defaults_file in defaults_files:
            with open(defaults_file, "r") as file_handle:
                for line in file_handle.readlines():
                    defaults_data += line

        # Convert YAML data to a Python dictionary
        defaults = {}
        try:
            yaml = YAML()
            defaults = yaml.load(defaults_data)
        except constructor.DuplicateKeyError as duplicate_key_error:
            raise ValueError(
                "Duplicate keys defined in multiple defaults files."
            ) from duplicate_key_error

        self._check_if_unsupported_settings(defaults)
        reader = self._check_input_format(defaults)
        self._check_output_format(defaults)

        if not citations:
            citeproc_specified = False

            # Cases where citeproc is specified as citeproc: true
            if defaults.get("citeproc", ""):
                citeproc_specified = True

            # Cases where citeproc is specified in filters
            elif "citeproc" in defaults.get("filters", ""):
                citeproc_specified = True

            # The extension +citations is enabled by default in Pandoc 2.11
            # we are checking that the extension is not disabled using -citations
            if citeproc_specified and "-citations" not in reader:
                citations = True

        if not table_of_contents:
            if defaults.get("table-of-contents", ""):
                table_of_contents = True

        return citations, table_of_contents

    def _calculate_reading_time(self, content):
        """Calculate time taken to read content."""
        reading_speed = self.settings.get("READING_SPEED", DEFAULT_READING_SPEED)
        wordcount = count_words_in_markdown(content)

        time_unit = "minutes"
        try:
            reading_time = math.ceil(float(wordcount) / float(reading_speed))
            if reading_time == 1:
                time_unit = "minute"
            reading_time = "{} {}".format(str(reading_time), time_unit)
        except ValueError as words_per_minute_nan:
            raise ValueError(
                "READING_SPEED setting must be a number."
            ) from words_per_minute_nan

        return reading_time

    def _process_metadata(self, pandoc_metadata):
        """Process Pandoc metadata and add it to Pelican."""
        # Cycle through the metadata and process them
        metadata = {}
        for key, value in pandoc_metadata.items():
            key = key.lower()
            if value and isinstance(value, str):
                value = value.strip().strip('"')

            # Process the metadata
            metadata[key] = self.process_metadata(key, value)
        return metadata

    @staticmethod
    def _check_pandoc_version(pandoc_executable):
        """Check that the specified version of Pandoc is 2.11 or higher."""
        output = subprocess.run(
            [pandoc_executable, "--version"],
            capture_output=True,
            encoding="utf-8",
            check=True,
        )

        # Returns a string of the form pandoc <version>
        pandoc_version = output.stdout.split("\n")[0]

        # Get the major and minor version from the above version string
        major_version = pandoc_version.split()[1].split(".")[0]
        minor_version = pandoc_version.split()[1].split(".")[1]

        # Pandoc major version less than 2 are not supported
        if int(major_version) < PANDOC_SUPPORTED_MAJOR_VERSION:
            raise Exception("Pandoc version must be 2.11 or higher.")

        # Pandoc major version 2 minor version less than 11 are not supported
        if (
            int(major_version) == PANDOC_SUPPORTED_MAJOR_VERSION
            and int(minor_version) < PANDOC_SUPPORTED_MINOR_VERSION
        ):
            raise Exception("Pandoc version must be 2.11 or higher.")

    @staticmethod
    def _check_yaml_metadata_block(content):
        """Check if the source content has a YAML metadata block."""
        # Check that the given content is not empty
        if not content:
            raise Exception("Could not find metadata. File is empty.")

        # Split content into a list of lines
        content_lines = content.splitlines()

        # Check that the first line of the file starts with a YAML block
        if content_lines[0].rstrip() not in ["---"]:
            raise Exception("Could not find metadata header '---'.")

        # Find the end of the YAML block
        yaml_block_end = ""
        for line_num, line in enumerate(content_lines[1:]):
            if line.rstrip() in ["---", "..."]:
                yaml_block_end = line_num
                break

        # Check if the end of the YAML block was found
        if not yaml_block_end:
            raise Exception("Could not find end of metadata block.")

    @staticmethod
    def _construct_pandoc_command(
        pandoc_executable, defaults_files, arguments, extensions
    ):
        """Construct Pandoc command for content."""
        pandoc_cmd = [
            pandoc_executable,
            "--standalone",
            "--template={}".format(
                os.path.join(TEMPLATES_PATH, PANDOC_READER_HTML_TEMPLATE)
            ),
        ]
        if not defaults_files:
            pandoc_cmd.extend(["--from", "markdown" + extensions, "--to", "html5"])
            pandoc_cmd.extend(arguments)
        else:
            for defaults_file in defaults_files:
                pandoc_cmd.append("--defaults={0}".format(defaults_file))
        return pandoc_cmd

    @staticmethod
    def _run_pandoc(pandoc_cmd, content):
        """Execute the given pandoc command and return output."""
        output = subprocess.run(
            pandoc_cmd,
            input=content,
            capture_output=True,
            encoding="utf-8",
            check=True,
        )
        return output.stdout

    @staticmethod
    def _extract_contents(html_output, table_of_contents):
        """Extract body html, table of contents and metadata from output."""
        # Extract pandoc metadata from html output
        pandoc_json_metadata, _, html_output = html_output.partition("\n")

        # Convert JSON string to dict
        pandoc_metadata = json.loads(pandoc_json_metadata)

        # Parse HTML output
        soup = bs4.BeautifulSoup(html_output, "html.parser")

        # Extract the table of contents if one was requested
        toc = ""
        if table_of_contents:
            # Find the table of contents
            toc = soup.body.find("nav", id="TOC")

            if toc:
                # Convert it to a string
                toc = str(toc)

                # Replace id=TOC with class="toc"
                toc = toc.replace('id="TOC"', 'class="toc"')

                # Remove the table of contents from the HTML output
                soup.body.find("nav", id="TOC").decompose()

        # Remove body tag around html output
        soup.body.unwrap()

        # Strip leading and trailing spaces
        html_output = str(soup).strip()

        return html_output, toc, pandoc_metadata

    @staticmethod
    def _check_if_citations(arguments, extensions):
        """Check if citations are specified."""
        citations = False
        if arguments and extensions:
            # The +citations extension is enabled by default in Pandoc 2.11
            # therefore we do a check to see that it is not disabled in extensions
            if (
                "--citeproc" in arguments or "-C" in arguments
            ) and "-citations" not in extensions:
                citations = True
        return citations

    @staticmethod
    def _check_if_toc(arguments):
        """Check if a table of contents should be generated."""
        table_of_contents = False
        if arguments:
            if "--toc" in arguments or "--table-of-contents" in arguments:
                table_of_contents = True
        return table_of_contents

    @staticmethod
    def _find_bibs(source_path):
        """Find bibliographies recursively in the sourcepath given."""
        bib_files = []
        filename = os.path.splitext(os.path.basename(source_path))[0]
        directory_path = os.path.dirname(os.path.abspath(source_path))
        for root, _, files in os.walk(directory_path):
            for extension in VALID_BIB_EXTENSIONS:
                bib_name = ".".join([filename, extension])
                if bib_name in files:
                    bib_files.append(os.path.join(root, bib_name))
        return bib_files

    @staticmethod
    def _check_arguments(arguments):
        """Check to see that only supported arguments have been passed."""
        for arg in arguments:
            if arg in UNSUPPORTED_ARGUMENTS:
                raise ValueError("Argument {0} is not supported.".format(arg))

    @staticmethod
    def _check_if_unsupported_settings(defaults):
        """Check if unsupported settings are specified in the defaults."""
        for arg in UNSUPPORTED_ARGUMENTS:
            arg = arg[2:]
            if defaults.get(arg, ""):
                raise ValueError("The default {} should be set to false.".format(arg))

    @staticmethod
    def _check_input_format(defaults):
        """Check if the input format given is a Markdown variant."""
        reader = ""
        reader_input = defaults.get("reader", "")
        from_input = defaults.get("from", "")

        # Case where no input format is specified
        if not reader_input and not from_input:
            raise ValueError("No input format specified.")

        # Case where both reader and from are specified which is not supported
        if reader_input and from_input:
            raise ValueError(
                (
                    "Specifying both from and reader is not supported."
                    " Please specify just one."
                )
            )

        if reader_input or from_input:
            if reader_input:
                reader = reader_input
            elif from_input:
                reader = from_input

            reader_prefix = reader.replace("+", "-").split("-")[0]

            # Check to see if the reader_prefix matches a valid input format
            if reader_prefix not in VALID_INPUT_FORMATS:
                raise ValueError("Input type has to be a Markdown variant.")
        return reader

    @staticmethod
    def _check_output_format(defaults):
        """Check if the output format is HTML or HTML5."""
        writer_output = defaults.get("writer", "")
        to_output = defaults.get("to", "")

        # Case where both writer and to are specified which is not supported
        if writer_output and to_output:
            raise ValueError(
                (
                    "Specifying both to and writer is not supported."
                    " Please specify just one."
                )
            )

        # Case where neither writer nor to value is set to html
        if (
            writer_output not in VALID_OUTPUT_FORMATS
            and to_output not in VALID_OUTPUT_FORMATS
        ):
            output_formats = " or ".join(VALID_OUTPUT_FORMATS)
            raise ValueError(
                "Output format type must be either {}.".format(output_formats)
            )


def add_reader(readers):
    """Add the PandocReader as the reader for all Pandoc Markdown files."""
    for ext in PandocReader.file_extensions:
        readers.reader_classes[ext] = PandocReader


def register():
    """Register the PandocReader."""
    signals.readers_init.connect(add_reader)
Обновление 2023-02-20 16:54:46 +03:00			`"""Reader that processes Pandoc Markdown and returns HTML5."""`
			`import json`
			`import math`
			`import os`
			`import shutil`
Fixes and enhancements including: * Add PANDOC_EXTENSIONS configuration variable, allowing one to enable or disable Pandoc's markdown extensions individually. * Remove dependency on pypandoc. * Don't change the working directory. * More efficient metadata extraction. 2014-11-26 13:42:37 -05:00			`import subprocess`
Add support for parsing YAML metadata. 2015-05-16 14:13:47 -04:00
Обновление 2023-02-20 16:54:46 +03:00			`import bs4`
			`from mwc.counter import count_words_in_markdown`
			`from ruamel.yaml import YAML, constructor`

added actual code 2014-03-26 11:35:27 +01:00			`from pelican import signals`
			`from pelican.readers import BaseReader`
use pelican_open in order to read the file 2014-03-30 16:02:53 +02:00			`from pelican.utils import pelican_open`
prettify code 2014-08-27 09:41:47 +02:00
Обновление 2023-02-20 16:54:46 +03:00			`DEFAULT_READING_SPEED = 200 # Words per minute`
			`DEFAULT_PANDOC_EXECUTABLE = "pandoc"`
			`DIR_PATH = os.path.dirname(__file__)`
			`ENCODED_LINKS_TO_RAW_LINKS_MAP = {`
			`"%7Bstatic%7D": "{static}",`
			`"%7Battach%7D": "{attach}",`
			`"%7Bfilename%7D": "{filename}",`
			`}`
			`FILE_EXTENSIONS = ["md", "mkd", "mkdn", "mdwn", "mdown", "markdown", "Rmd"]`
			`PANDOC_READER_HTML_TEMPLATE = "pandoc-reader-default.html"`
			`PANDOC_SUPPORTED_MAJOR_VERSION = 2`
			`PANDOC_SUPPORTED_MINOR_VERSION = 11`

			`TEMPLATES_PATH = os.path.abspath(os.path.join(DIR_PATH, "templates"))`
			`UNSUPPORTED_ARGUMENTS = ("--standalone", "--self-contained")`
			`VALID_BIB_EXTENSIONS = ["json", "yaml", "bibtex", "bib"]`

			`# Markdown variants supported in defaults files`
			`# Update as Pandoc adds or removes support for formats`
			`VALID_INPUT_FORMATS = (`
			`"commonmark",`
			`"commonmark_x",`
			`"gfm",`
			`"markdown",`
			`"markdown_mmd",`
			`"markdown_phpextra",`
			`"markdown_strict",`
			`)`
			`VALID_OUTPUT_FORMATS = ("html", "html5")`
Add support for parsing YAML metadata. 2015-05-16 14:13:47 -04:00

prettify code 2014-08-27 09:41:47 +02:00			`class PandocReader(BaseReader):`
Обновление 2023-02-20 16:54:46 +03:00			`"""Convert files written in Pandoc Markdown to HTML 5."""`

added actual code 2014-03-26 11:35:27 +01:00			`enabled = True`
Обновление 2023-02-20 16:54:46 +03:00			`file_extensions = FILE_EXTENSIONS`

			`def read(self, source_path):`
			`"""Parse Pandoc Markdown and return HTML5 markup and metadata."""`
			`# Get the user-defined path to the Pandoc executable or fall back to default`
			`pandoc_executable = self.settings.get(`
			`"PANDOC_EXECUTABLE_PATH", DEFAULT_PANDOC_EXECUTABLE`
			`)`

			`# If user-defined path, expand and make it absolute in case the path is relative`
			`if pandoc_executable != DEFAULT_PANDOC_EXECUTABLE:`
			`pandoc_executable = os.path.abspath(os.path.expanduser(pandoc_executable))`

			`# Check if pandoc is installed and is executable`
			`if not shutil.which(pandoc_executable):`
			`raise Exception("Could not find Pandoc. Please install.")`

			`# Check if the version of pandoc installed is 2.11 or higher`
			`self._check_pandoc_version(pandoc_executable)`

			`# Open Markdown file and read content`
			`content = ""`
			`with pelican_open(source_path) as file_content:`
			`content = file_content`

			`# Retrieve HTML content and metadata`
			`output, metadata = self._create_html(source_path, content, pandoc_executable)`

			`return output, metadata`

			`def _create_html(self, source_path, content, pandoc_executable):`
			`"""Create HTML5 content."""`
			`# Get settings set in pelicanconf.py`
			`defaults_files = self.settings.get("PANDOC_DEFAULTS_FILES", [])`

			`# Adding support for the old defaults file setting`
			`# which will be removed in future versions of this plugin`
			`if self.settings.get("PANDOC_DEFAULT_FILES", []):`
			`defaults_files = self.settings.get("PANDOC_DEFAULT_FILES")`

			`arguments = self.settings.get("PANDOC_ARGS", [])`
			`extensions = self.settings.get("PANDOC_EXTENSIONS", [])`

			`if isinstance(extensions, list):`
			`extensions = "".join(extensions)`

			`# Check if source content has a YAML metadata block`
			`self._check_yaml_metadata_block(content)`

			`# Check validity of arguments or defaults files`
			`table_of_contents, citations = self._validate_fields(`
			`defaults_files, arguments, extensions`
			`)`

			`# Construct preliminary pandoc command`
			`pandoc_cmd = self._construct_pandoc_command(`
			`pandoc_executable, defaults_files, arguments, extensions`
			`)`

			`# Find and add bibliography if citations are specified`
			`if citations:`
			`for bib_file in self._find_bibs(source_path):`
			`pandoc_cmd.append("--bibliography={0}".format(bib_file))`

			`# Create HTML content using pandoc-reader-default.html template`
			`output = self._run_pandoc(pandoc_cmd, content)`

			`# Extract table of contents, text and metadata from HTML output`
			`output, toc, pandoc_metadata = self._extract_contents(output, table_of_contents)`

			`# Replace all occurrences of %7Bstatic%7D to {static},`
			`# %7Battach%7D to {attach} and %7Bfilename%7D to {filename}`
			`# so that static links are resolvable by pelican`
			`for encoded_str, raw_str in ENCODED_LINKS_TO_RAW_LINKS_MAP.items():`
			`output = output.replace(encoded_str, raw_str)`

			`# Parse Pandoc metadata and add it to Pelican`
			`metadata = self._process_metadata(pandoc_metadata)`

			`if table_of_contents:`
			`# Create table of contents and add to metadata`
			`metadata["toc"] = self.process_metadata("toc", toc)`

			`if self.settings.get("CALCULATE_READING_TIME", []):`
			`# Calculate reading time and add to metadata`
			`metadata["reading_time"] = self.process_metadata(`
			`"reading_time", self._calculate_reading_time(content)`
			`)`

			`return output, metadata`

			`def _validate_fields(self, defaults_files, arguments, extensions):`
			`"""Validate fields and return citations and ToC request values."""`
			`# If defaults_files is empty then validate the argument and extensions`
			`if not defaults_files:`
			`# Validate the arguments to see that they are supported`
			`# by the plugin`
			`self._check_arguments(arguments)`

			`# Check if citations have been requested`
			`citations = self._check_if_citations(arguments, extensions)`

			`# Check if table of contents has been requested`
			`table_of_contents = self._check_if_toc(arguments)`
			`else:`
			`# Validate defaults files and get the citations`
			`# abd table of contents request value`
			`citations, table_of_contents = self._check_defaults(defaults_files)`
			`return table_of_contents, citations`

			`def _check_defaults(self, defaults_files):`
			`"""Check if the given Pandoc defaults file has valid values."""`
			`citations = False`
			`table_of_contents = False`

			`# Get the data in all defaults files as a string`
			`defaults_data = ""`
			`for defaults_file in defaults_files:`
			`with open(defaults_file, "r") as file_handle:`
			`for line in file_handle.readlines():`
			`defaults_data += line`

			`# Convert YAML data to a Python dictionary`
			`defaults = {}`
			`try:`
			`yaml = YAML()`
			`defaults = yaml.load(defaults_data)`
			`except constructor.DuplicateKeyError as duplicate_key_error:`
			`raise ValueError(`
			`"Duplicate keys defined in multiple defaults files."`
			`) from duplicate_key_error`

			`self._check_if_unsupported_settings(defaults)`
			`reader = self._check_input_format(defaults)`
			`self._check_output_format(defaults)`

			`if not citations:`
			`citeproc_specified = False`
added actual code 2014-03-26 11:35:27 +01:00
Обновление 2023-02-20 16:54:46 +03:00			`# Cases where citeproc is specified as citeproc: true`
			`if defaults.get("citeproc", ""):`
			`citeproc_specified = True`

			`# Cases where citeproc is specified in filters`
			`elif "citeproc" in defaults.get("filters", ""):`
			`citeproc_specified = True`

			`# The extension +citations is enabled by default in Pandoc 2.11`
			`# we are checking that the extension is not disabled using -citations`
			`if citeproc_specified and "-citations" not in reader:`
			`citations = True`

			`if not table_of_contents:`
			`if defaults.get("table-of-contents", ""):`
			`table_of_contents = True`

			`return citations, table_of_contents`

			`def _calculate_reading_time(self, content):`
			`"""Calculate time taken to read content."""`
			`reading_speed = self.settings.get("READING_SPEED", DEFAULT_READING_SPEED)`
			`wordcount = count_words_in_markdown(content)`

			`time_unit = "minutes"`
			`try:`
			`reading_time = math.ceil(float(wordcount) / float(reading_speed))`
			`if reading_time == 1:`
			`time_unit = "minute"`
			`reading_time = "{} {}".format(str(reading_time), time_unit)`
			`except ValueError as words_per_minute_nan:`
			`raise ValueError(`
			`"READING_SPEED setting must be a number."`
			`) from words_per_minute_nan`

			`return reading_time`

			`def _process_metadata(self, pandoc_metadata):`
			`"""Process Pandoc metadata and add it to Pelican."""`
			`# Cycle through the metadata and process them`
Add support for parsing YAML metadata. 2015-05-16 14:13:47 -04:00			`metadata = {}`
Обновление 2023-02-20 16:54:46 +03:00			`for key, value in pandoc_metadata.items():`
			`key = key.lower()`
			`if value and isinstance(value, str):`
			`value = value.strip().strip('"')`

			`# Process the metadata`
			`metadata[key] = self.process_metadata(key, value)`
			`return metadata`

			`@staticmethod`
			`def _check_pandoc_version(pandoc_executable):`
			`"""Check that the specified version of Pandoc is 2.11 or higher."""`
			`output = subprocess.run(`
			`[pandoc_executable, "--version"],`
			`capture_output=True,`
			`encoding="utf-8",`
			`check=True,`
			`)`
Add support for parsing YAML metadata. 2015-05-16 14:13:47 -04:00
Обновление 2023-02-20 16:54:46 +03:00			`# Returns a string of the form pandoc <version>`
			`pandoc_version = output.stdout.split("\n")[0]`
Add support for parsing YAML metadata. 2015-05-16 14:13:47 -04:00
Обновление 2023-02-20 16:54:46 +03:00			`# Get the major and minor version from the above version string`
			`major_version = pandoc_version.split()[1].split(".")[0]`
			`minor_version = pandoc_version.split()[1].split(".")[1]`
Add support for parsing YAML metadata. 2015-05-16 14:13:47 -04:00
Обновление 2023-02-20 16:54:46 +03:00			`# Pandoc major version less than 2 are not supported`
			`if int(major_version) < PANDOC_SUPPORTED_MAJOR_VERSION:`
			`raise Exception("Pandoc version must be 2.11 or higher.")`
Add support for parsing YAML metadata. 2015-05-16 14:13:47 -04:00
Обновление 2023-02-20 16:54:46 +03:00			`# Pandoc major version 2 minor version less than 11 are not supported`
			`if (`
			`int(major_version) == PANDOC_SUPPORTED_MAJOR_VERSION`
			`and int(minor_version) < PANDOC_SUPPORTED_MINOR_VERSION`
			`):`
			`raise Exception("Pandoc version must be 2.11 or higher.")`
Add support for parsing YAML metadata. 2015-05-16 14:13:47 -04:00
Обновление 2023-02-20 16:54:46 +03:00			`@staticmethod`
			`def _check_yaml_metadata_block(content):`
			`"""Check if the source content has a YAML metadata block."""`
			`# Check that the given content is not empty`
			`if not content:`
			`raise Exception("Could not find metadata. File is empty.")`
Allow empty summary 2019-04-25 14:52:57 +03:00
Обновление 2023-02-20 16:54:46 +03:00			`# Split content into a list of lines`
			`content_lines = content.splitlines()`
Usable, albeit slightly hackish, solution for YAML metadata. 2016-08-04 22:52:50 -04:00
Обновление 2023-02-20 16:54:46 +03:00			`# Check that the first line of the file starts with a YAML block`
			`if content_lines[0].rstrip() not in ["---"]:`
			`raise Exception("Could not find metadata header '---'.")`

			`# Find the end of the YAML block`
			`yaml_block_end = ""`
			`for line_num, line in enumerate(content_lines[1:]):`
			`if line.rstrip() in ["---", "..."]:`
			`yaml_block_end = line_num`
			`break`

			`# Check if the end of the YAML block was found`
			`if not yaml_block_end:`
			`raise Exception("Could not find end of metadata block.")`

			`@staticmethod`
			`def _construct_pandoc_command(`
			`pandoc_executable, defaults_files, arguments, extensions`
			`):`
			`"""Construct Pandoc command for content."""`
			`pandoc_cmd = [`
			`pandoc_executable,`
			`"--standalone",`
			`"--template={}".format(`
			`os.path.join(TEMPLATES_PATH, PANDOC_READER_HTML_TEMPLATE)`
			`),`
			`]`
			`if not defaults_files:`
			`pandoc_cmd.extend(["--from", "markdown" + extensions, "--to", "html5"])`
			`pandoc_cmd.extend(arguments)`
Add support for parsing YAML metadata. 2015-05-16 14:13:47 -04:00			`else:`
Обновление 2023-02-20 16:54:46 +03:00			`for defaults_file in defaults_files:`
			`pandoc_cmd.append("--defaults={0}".format(defaults_file))`
			`return pandoc_cmd`

			`@staticmethod`
			`def _run_pandoc(pandoc_cmd, content):`
			`"""Execute the given pandoc command and return output."""`
			`output = subprocess.run(`
attempt to enable bibligraphy but founder in relative file paths 2019-02-23 16:14:24 +11:00			`pandoc_cmd,`
Обновление 2023-02-20 16:54:46 +03:00			`input=content,`
			`capture_output=True,`
			`encoding="utf-8",`
			`check=True,`
			`)`
			`return output.stdout`
use brandonwillard's less crazy link preservation' 2018-07-13 21:32:30 -05:00
Обновление 2023-02-20 16:54:46 +03:00			`@staticmethod`
			`def _extract_contents(html_output, table_of_contents):`
			`"""Extract body html, table of contents and metadata from output."""`
			`# Extract pandoc metadata from html output`
			`pandoc_json_metadata, _, html_output = html_output.partition("\n")`

			`# Convert JSON string to dict`
			`pandoc_metadata = json.loads(pandoc_json_metadata)`

			`# Parse HTML output`
			`soup = bs4.BeautifulSoup(html_output, "html.parser")`

			`# Extract the table of contents if one was requested`
			`toc = ""`
			`if table_of_contents:`
			`# Find the table of contents`
			`toc = soup.body.find("nav", id="TOC")`

			`if toc:`
			`# Convert it to a string`
			`toc = str(toc)`

			`# Replace id=TOC with class="toc"`
			`toc = toc.replace('id="TOC"', 'class="toc"')`

			`# Remove the table of contents from the HTML output`
			`soup.body.find("nav", id="TOC").decompose()`

			`# Remove body tag around html output`
			`soup.body.unwrap()`

			`# Strip leading and trailing spaces`
			`html_output = str(soup).strip()`

			`return html_output, toc, pandoc_metadata`

			`@staticmethod`
			`def _check_if_citations(arguments, extensions):`
			`"""Check if citations are specified."""`
			`citations = False`
			`if arguments and extensions:`
			`# The +citations extension is enabled by default in Pandoc 2.11`
			`# therefore we do a check to see that it is not disabled in extensions`
			`if (`
			`"--citeproc" in arguments or "-C" in arguments`
			`) and "-citations" not in extensions:`
			`citations = True`
			`return citations`

			`@staticmethod`
			`def _check_if_toc(arguments):`
			`"""Check if a table of contents should be generated."""`
			`table_of_contents = False`
			`if arguments:`
			`if "--toc" in arguments or "--table-of-contents" in arguments:`
			`table_of_contents = True`
			`return table_of_contents`

			`@staticmethod`
			`def _find_bibs(source_path):`
			`"""Find bibliographies recursively in the sourcepath given."""`
			`bib_files = []`
			`filename = os.path.splitext(os.path.basename(source_path))[0]`
			`directory_path = os.path.dirname(os.path.abspath(source_path))`
			`for root, _, files in os.walk(directory_path):`
			`for extension in VALID_BIB_EXTENSIONS:`
			`bib_name = ".".join([filename, extension])`
			`if bib_name in files:`
			`bib_files.append(os.path.join(root, bib_name))`
			`return bib_files`

			`@staticmethod`
			`def _check_arguments(arguments):`
			`"""Check to see that only supported arguments have been passed."""`
			`for arg in arguments:`
			`if arg in UNSUPPORTED_ARGUMENTS:`
			`raise ValueError("Argument {0} is not supported.".format(arg))`

			`@staticmethod`
			`def _check_if_unsupported_settings(defaults):`
			`"""Check if unsupported settings are specified in the defaults."""`
			`for arg in UNSUPPORTED_ARGUMENTS:`
			`arg = arg[2:]`
			`if defaults.get(arg, ""):`
			`raise ValueError("The default {} should be set to false.".format(arg))`

			`@staticmethod`
			`def _check_input_format(defaults):`
			`"""Check if the input format given is a Markdown variant."""`
			`reader = ""`
			`reader_input = defaults.get("reader", "")`
			`from_input = defaults.get("from", "")`

			`# Case where no input format is specified`
			`if not reader_input and not from_input:`
			`raise ValueError("No input format specified.")`

			`# Case where both reader and from are specified which is not supported`
			`if reader_input and from_input:`
			`raise ValueError(`
			`(`
			`"Specifying both from and reader is not supported."`
			`" Please specify just one."`
			`)`
			`)`

			`if reader_input or from_input:`
			`if reader_input:`
			`reader = reader_input`
			`elif from_input:`
			`reader = from_input`

			`reader_prefix = reader.replace("+", "-").split("-")[0]`

			`# Check to see if the reader_prefix matches a valid input format`
			`if reader_prefix not in VALID_INPUT_FORMATS:`
			`raise ValueError("Input type has to be a Markdown variant.")`
			`return reader`

			`@staticmethod`
			`def _check_output_format(defaults):`
			`"""Check if the output format is HTML or HTML5."""`
			`writer_output = defaults.get("writer", "")`
			`to_output = defaults.get("to", "")`

			`# Case where both writer and to are specified which is not supported`
			`if writer_output and to_output:`
			`raise ValueError(`
			`(`
			`"Specifying both to and writer is not supported."`
			`" Please specify just one."`
			`)`
			`)`

			`# Case where neither writer nor to value is set to html`
			`if (`
			`writer_output not in VALID_OUTPUT_FORMATS`
			`and to_output not in VALID_OUTPUT_FORMATS`
			`):`
			`output_formats = " or ".join(VALID_OUTPUT_FORMATS)`
			`raise ValueError(`
			`"Output format type must be either {}.".format(output_formats)`
			`)`
prettify code 2014-08-27 09:41:47 +02:00
Add support for parsing YAML metadata. 2015-05-16 14:13:47 -04:00
added actual code 2014-03-26 11:35:27 +01:00			`def add_reader(readers):`
Обновление 2023-02-20 16:54:46 +03:00			`"""Add the PandocReader as the reader for all Pandoc Markdown files."""`
pandoc reader for all markdown extensions This makes the PandocReader the default markdown reader for all markdown extensions not just "md" 2014-11-25 09:23:07 -07:00			`for ext in PandocReader.file_extensions:`
			`readers.reader_classes[ext] = PandocReader`
prettify code 2014-08-27 09:41:47 +02:00
Add support for parsing YAML metadata. 2015-05-16 14:13:47 -04:00
added actual code 2014-03-26 11:35:27 +01:00			`def register():`
Обновление 2023-02-20 16:54:46 +03:00			`"""Register the PandocReader."""`
added actual code 2014-03-26 11:35:27 +01:00			`signals.readers_init.connect(add_reader)`