From 37be0dc8175719b253a1401b10a3ea666a6fc060 Mon Sep 17 00:00:00 2001 From: Jannis Mainczyk Date: Tue, 12 Sep 2023 13:20:24 +0200 Subject: [PATCH] feat: improve code language detection use `pygments` if available fallback to detection based on file extension map between extension and code language (e.g. yml -> yaml, j2 -> jinja) ops(release): bump to v0.0.5 --- CHANGELOG.md | 12 +++++++++++- includex.py | 40 +++++++++++++++++++++++++++++++++++++--- pyproject.toml | 3 +++ 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e213f49..b6a9268 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased -[*see all changes*](https://github.com/jannismain/mkdocs-macros-includex/compare/v0.0.4...HEAD) +[*see all changes*](https://github.com/jannismain/mkdocs-macros-includex/compare/v0.0.5...HEAD) + +## [0.0.5] - 2023-09-12 +[0.0.5]: https://github.com/jannismain/mkdocs-macros-includex/releases/tag/v0.0.5 + +### Changed + +- **code**: Infer code language using `pygments`, if available (added as optional dependency) + - if `pygments` is not available, map some file extensions to pygments language (e.g. `yml` -> `yaml`) + +[*see all changes*](https://github.com/jannismain/mkdocs-macros-includex/compare/v0.0.4...v0.0.5) ## [0.0.4] - 2023-09-12 [0.0.4]: https://github.com/jannismain/mkdocs-macros-includex/releases/tag/v0.0.4 diff --git a/includex.py b/includex.py index 1b151de..99f369e 100644 --- a/includex.py +++ b/includex.py @@ -1,7 +1,14 @@ import pathlib from warnings import warn -__version__ = "0.0.4" +try: + import pygments + + use_pygments = True +except ImportError: + use_pygments = False + +__version__ = "0.0.5" def define_env(env): # pragma: no cover @@ -18,6 +25,12 @@ def define_env(env): # pragma: no cover ERROR_NOTICE_TEMPLATE = '%s' CAPTION_TEMPLATE = "*%(filepath)s%(line)s*{.caption}" +CODE_EXTENSION_TO_LANGUAGE = {"yml": "yaml", "j2": "jinja"} +"""Map of file extensions to code language. + +Used when pygments is not available. +""" + def includex( filepath: pathlib.Path, @@ -211,8 +224,11 @@ def includex( stacklevel=2, ) - if code is True and lang is None and filepath.suffix: - lang = filepath.suffixes[-1][1:] # drop leading dot + if code is True and lang is None: + if use_pygments: + lang = _infer_lang_using_pygments(filepath, "".join(content)) + if not use_pygments or lang is None: # fallback in case pygments failed to guess lang + lang = _infer_lang_file_extension(filepath) elif isinstance(code, str): lang = code @@ -291,6 +307,24 @@ def includex( ) +def _infer_lang_using_pygments(filepath, text): + """Infer language using pygments based on filename or content.""" + try: + lexer = pygments.lexers.guess_lexer_for_filename(filepath.name, text) + except pygments.util.ClassNotFound: + try: + lexer = pygments.lexers.guess_lexer(text) + except pygments.util.ClassNotFound: + lexer = None + if lexer is not None: + return lexer.name + + +def _infer_lang_file_extension(filepath): + file_extension = filepath.suffixes[-1][1:] + return CODE_EXTENSION_TO_LANGUAGE.get(file_extension, file_extension) + + def _render_caption(caption, filepath: pathlib.Path, start=0, end=0): if end is None: # open end inclusion end_line_str = "-" diff --git a/pyproject.toml b/pyproject.toml index 0f5bdd5..3069eac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,9 @@ license = "MIT" authors = [{ name = "Jannis Mainczyk", email = "jmainczyk@gmail.com" }] requires-python = ">=3.10" +[project.optional-dependencies] +pygments = ["pygments"] + [tool.hatch.version] path = "includex.py"