gpoore / minted

minted is a LaTeX package that provides syntax highlighting using the Pygments library. Highlighted source code can be customized using fancyvrb.
1.76k stars 128 forks source link

using callback in prolog lexer gives errors #309

Closed pedrozudo closed 1 year ago

pedrozudo commented 3 years ago

Hi,

So, I was playing around with the prolog lexer of pygments and try to extend it with callbacks in order to highlight special tokens differently. The weird thing is that within a custom minted environment it does not work but when just using pygments without minted it does give me the expected output.

Here are the details:

I have the following lexer file, called: prolog_special_lexer.py (it is more or less copy-pasted from the pygment prolog lexer, only difference being that I have introduced callback to catch special token). The names of the callback functions are special_atom_callback and special_function_callback.

import re

from pygments.lexer import RegexLexer, bygroups
from pygments.token import (
    Text,
    Comment,
    Operator,
    Keyword,
    Name,
    String,
    Number,
    Punctuation,
)

__all__ = ["SpecialPrologLexer"]

def special_atom_callback(lexer, match):
    if match.group(0) in lexer.special_words:
        token = Name.Builtin
    else:
        token = String.Atom
    yield match.start(0), token, match.group(0)

def special_function_callback(lexer, match):
    if match.group(1) in lexer.special_words:
        token = Name.Builtin
    else:
        token = Name.Function
    return bygroups(token, Text, Operator)(lexer, match)

class SpecialPrologLexer(RegexLexer):
    """
    Lexer for Prolog files.
    """

    special_words = ["holidays", "vacation"]

    name = "Prolog"
    aliases = ["prolog"]
    filenames = ["*.ecl", "*.prolog", "*.pro", "*.pl"]
    mimetypes = ["text/x-prolog"]

    flags = re.UNICODE | re.MULTILINE

    tokens = {
        "root": [
            (r"/\*", Comment.Multiline, "nested-comment"),
            (r"%.*", Comment.Single),
            # character literal
            (r"0\'.", String.Char),
            (r"0b[01]+", Number.Bin),
            (r"0o[0-7]+", Number.Oct),
            (r"0x[0-9a-fA-F]+", Number.Hex),
            # literal with prepended base
            (r"\d\d?\'[a-zA-Z0-9]+", Number.Integer),
            (r"(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?", Number.Float),
            (r"\d+", Number.Integer),
            (r"[\[\](){}|.,;!]", Punctuation),
            (r":-|-->", Punctuation),
            (
                r'"(?:\\x[0-9a-fA-F]+\\|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|'
                r'\\[0-7]+\\|\\["\nabcefnrstv]|[^\\"])*"',
                String.Double,
            ),
            (r"'(?:''|[^'])*'", String.Atom),  # quoted atom
            # Needs to not be followed by an atom.
            # (r'=(?=\s|[a-zA-Z\[])', Operator),
            (r"is\b", Operator),
            (r"(<|>|=<|>=|==|=:=|=|/|//|\*|\+|-)(?=\s|[a-zA-Z0-9\[])", Operator),
            (r"(mod|div|not)\b", Operator),
            (r"_", Keyword),  # The don't-care variable
            (r"([a-z]+)(:)", bygroups(Name.Namespace, Punctuation)),
            (
                r"([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]"
                r"[\w$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)"
                r"(\s*)(:-|-->)",
                # bygroups(Name.Function, Text, Operator),
                special_function_callback,
            ),  # function defn
            (
                r"([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]"
                r"[\w$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)"
                r"(\s*)(\()",
                # bygroups(Name.Function, Text, Punctuation),
                special_function_callback,
            ),
            (
                r"[a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]"
                r"[\w$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*",
                # String.Atom,
                special_atom_callback,
            ),  # atom, characters
            # This one includes !
            (
                r"[#&*+\-./:<=>?@\\^~\u00a1-\u00bf\u2010-\u303f]+",
                # String.Atom,
                special_atom_callback,
            ),  # atom, graphics
            (r"[A-Z_]\w*", Name.Variable),
            (r"\s+|[\u2000-\u200f\ufff0-\ufffe\uffef]", Text),
        ],
        "nested-comment": [
            (r"\*/", Comment.Multiline, "#pop"),
            (r"/\*", Comment.Multiline, "#push"),
            (r"[^*/]+", Comment.Multiline),
            (r"[*/]", Comment.Multiline),
        ],
    }

    def analyse_text(text):
        return ":-" in text

So this is my lexer, all good so far. I can test this with the following python script.

from pygments import highlight
from prolog_special_lexer import SpecialPrologLexer
from pygments.formatters import LatexFormatter

code = """
vacation :- time, money.
holidays :- time.
"""

formatter = LatexFormatter()
lexer = SpecialPrologLexer()

text_file = open("code_output.tex", "w+")
highlighted = highlight(code, lexer, formatter)
text_file.write(highlighted)
text_file.close()

text_file = open("style_output.tex", "w+")
text_file.write(formatter.get_style_defs())
text_file.close()

This python script runs and outputs two files one producing the tex-code to render the prolbog-code and the other producing the tex-code for the style. I can include this in a .tex file and run it.

\documentclass{article}
% General document formatting
\usepackage[margin=0.7in]{geometry}
\usepackage[parfill]{parskip}
\usepackage[utf8]{inputenc}

% Related to math
\usepackage{amsmath,amssymb,amsfonts,amsthm}

\usepackage{pygmentex}

\usepackage[cache=false]{minted}
\usepackage{fancyvrb}
\usepackage{color}

\makeatletter
\def\PY@reset{\let\PY@it=\relax \let\PY@bf=\relax%
    \let\PY@ul=\relax \let\PY@tc=\relax%
    \let\PY@bc=\relax \let\PY@ff=\relax}
\def\PY@tok#1{\csname PY@tok@#1\endcsname}
\def\PY@toks#1+{\ifx\relax#1\empty\else%
    \PY@tok{#1}\expandafter\PY@toks\fi}
\def\PY@do#1{\PY@bc{\PY@tc{\PY@ul{%
                \PY@it{\PY@bf{\PY@ff{#1}}}}}}}
\def\PY#1#2{\PY@reset\PY@toks#1+\relax+\PY@do{#2}}

\expandafter\def\csname PY@tok@w\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.73,0.73}{##1}}}
\expandafter\def\csname PY@tok@c\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PY@tok@cp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.74,0.48,0.00}{##1}}}
\expandafter\def\csname PY@tok@k\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@kp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@kt\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.69,0.00,0.25}{##1}}}
\expandafter\def\csname PY@tok@o\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@ow\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}}
\expandafter\def\csname PY@tok@nb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@nf\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PY@tok@nc\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PY@tok@nn\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PY@tok@ne\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.82,0.25,0.23}{##1}}}
\expandafter\def\csname PY@tok@nv\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@no\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.53,0.00,0.00}{##1}}}
\expandafter\def\csname PY@tok@nl\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.63,0.63,0.00}{##1}}}
\expandafter\def\csname PY@tok@ni\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.60,0.60,0.60}{##1}}}
\expandafter\def\csname PY@tok@na\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.49,0.56,0.16}{##1}}}
\expandafter\def\csname PY@tok@nt\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@nd\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}}
\expandafter\def\csname PY@tok@s\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@sd\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@si\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}}
\expandafter\def\csname PY@tok@se\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.13}{##1}}}
\expandafter\def\csname PY@tok@sr\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}}
\expandafter\def\csname PY@tok@ss\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@sx\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@m\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@gh\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}}
\expandafter\def\csname PY@tok@gu\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.50,0.00,0.50}{##1}}}
\expandafter\def\csname PY@tok@gd\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.63,0.00,0.00}{##1}}}
\expandafter\def\csname PY@tok@gi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.63,0.00}{##1}}}
\expandafter\def\csname PY@tok@gr\endcsname{\def\PY@tc##1{\textcolor[rgb]{1.00,0.00,0.00}{##1}}}
\expandafter\def\csname PY@tok@ge\endcsname{\let\PY@it=\textit}
\expandafter\def\csname PY@tok@gs\endcsname{\let\PY@bf=\textbf}
\expandafter\def\csname PY@tok@gp\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}}
\expandafter\def\csname PY@tok@go\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.53,0.53,0.53}{##1}}}
\expandafter\def\csname PY@tok@gt\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.27,0.87}{##1}}}
\expandafter\def\csname PY@tok@err\endcsname{\def\PY@bc##1{\setlength{\fboxsep}{0pt}\fcolorbox[rgb]{1.00,0.00,0.00}{1,1,1}{\strut ##1}}}
\expandafter\def\csname PY@tok@kc\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@kd\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@kn\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@kr\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@bp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@fm\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PY@tok@vc\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@vg\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@vi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@vm\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@sa\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@sb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@sc\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@dl\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@s2\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@sh\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@s1\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@mb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@mf\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@mh\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@mi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@il\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@mo\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@ch\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PY@tok@cm\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PY@tok@cpf\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PY@tok@c1\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PY@tok@cs\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}

\def\PYZbs{\char`\\}
\def\PYZus{\char`\_}
\def\PYZob{\char`\{}
\def\PYZcb{\char`\}}
\def\PYZca{\char`\^}
\def\PYZam{\char`\&}
\def\PYZlt{\char`\<}
\def\PYZgt{\char`\>}
\def\PYZsh{\char`\#}
\def\PYZpc{\char`\%}
\def\PYZdl{\char`\$}
\def\PYZhy{\char`\-}
\def\PYZsq{\char`\'}
\def\PYZdq{\char`\"}
\def\PYZti{\char`\~}
% for compatibility with earlier versions
\def\PYZat{@}
\def\PYZlb{[}
\def\PYZrb{]}
\makeatother

\title{Test ProbLog Lexer}
\date{}

\begin{document}

    \maketitle

\begin{Verbatim}[commandchars=\\\{\}]
    \PY{n+nb}{vacation} \PY{o}{:\PYZhy{}} \PY{l+s+sAtom}{time}\PY{p}{,} \PY{l+s+sAtom}{money}\PY{p}{.}
    \PY{n+nb}{holidays} \PY{o}{:\PYZhy{}} \PY{l+s+sAtom}{time}\PY{p}{.}
\end{Verbatim}

\end{document}

Most of the stuff in the preamble is the style and the prolog code is in the Verbatim environment in the body of the .tex file. Compiling the .tex file works just fine and the prolog code is highlighted as expected (holidays and vacation are the two special words that are highlighted as builtins). The pic below is a partial screenshot of the entire pdf that is produced by the .tex code.

image

So far so good. Now I want to have this within minted. I basically declare a new minted environment using the prolog lexer with the callbacks and should be done. Well not really, it does not work. Here is my .tex file using minted:

\documentclass{article}
% General document formatting
\usepackage[margin=0.7in]{geometry}
\usepackage[parfill]{parskip}
\usepackage[utf8]{inputenc}

% Related to math
\usepackage{amsmath,amssymb,amsfonts,amsthm}

\usepackage{txfonts}
\usepackage[finalizecache=false,frozencache=false,newfloat]{minted}
\usemintedstyle{default}
\setminted[prolog_special_lexer.py:SpecialPrologLexer -x]{}

\newminted[prolog_special]{prolog_special_lexer.py:SpecialPrologLexer -x}{}

\title{Test ProLog Lexer}
\date{}

\begin{document}
    \maketitle

\begin{prolog_special}

    vacation:- time, money.
    holidays:- time.

\end{prolog_specical}

\end{document}

Compiling this .tex file now does not work. It gives me the following error:

File ended while scanning use of \FancyVerbGetLine.

I guess this is the relevant part of the log:

\openout3 = `main.pyg'.

)
Runaway argument?
! File ended while scanning use of \FancyVerbGetLine.
<inserted text> 
                \par 
<*> main.tex

I suspect you have forgotten a `}', causing me
to read past where you wanted me to stop.
I'll try to recover; but if the error is serious,
you'd better type `E' or `X' now and fix your file.

! Emergency stop.
<*> main.tex

*** (job aborted, no legal \end found)

So yeah, anyone got an idea what's going on here?

Ah, just running the prolog lexer (the original one) with minted works fine. So it has got something to do with the callbacks.

muzimuzhi commented 3 years ago

The problem came from the _ character in env name prolog_special. Using prolog-special works.

With input

\begin{prolog_special}
    vacation:- time, money.
    holidays:- time.
\end{prolog_special}

In conclusion, without special treatment, only those characters having the same catcode before and after \begin{minted} can be used in <envname> of \newminted[<language>]{<envname>}{<options>}. For example, a-zA-Z always have catcode 11 (letter) and - always has catcode 12 (other).

pedrozudo commented 3 years ago

Ah great, thanks a lot. It runs now without errors! I was looking at the completely wrong place to fix the bug.

I have a follow up question. You might be able to answer. Let's assume I want to compute the special words in a just in time fashion. That is, I want to have the lexer analyse the entire text and get from within the text the special words. I tried something like the following:


class SpecialPrologLexer(RegexLexer):
    """
    Lexer for Prolog files.
    """

    special_words = []

    name = "Prolog"
    aliases = ["prolog"]
    filenames = ["*.ecl", "*.prolog", "*.pro", "*.pl"]
    mimetypes = ["text/x-prolog"]

    flags = re.UNICODE | re.MULTILINE

    tokens = {
        "root": [
            (r"/\*", Comment.Multiline, "nested-comment"),
            (r"%.*", Comment.Single),
            # character literal
            (r"0\'.", String.Char),
            (r"0b[01]+", Number.Bin),
            (r"0o[0-7]+", Number.Oct),
            (r"0x[0-9a-fA-F]+", Number.Hex),
            # literal with prepended base
            (r"\d\d?\'[a-zA-Z0-9]+", Number.Integer),
            (r"(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?", Number.Float),
            (r"\d+", Number.Integer),
            (r"[\[\](){}|.,;!]", Punctuation),
            (r":-|-->", Punctuation),
            (
                r'"(?:\\x[0-9a-fA-F]+\\|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|'
                r'\\[0-7]+\\|\\["\nabcefnrstv]|[^\\"])*"',
                String.Double,
            ),
            (r"'(?:''|[^'])*'", String.Atom),  # quoted atom
            # Needs to not be followed by an atom.
            # (r'=(?=\s|[a-zA-Z\[])', Operator),
            (r"is\b", Operator),
            (r"(<|>|=<|>=|==|=:=|=|/|//|\*|\+|-)(?=\s|[a-zA-Z0-9\[])", Operator),
            (r"(mod|div|not)\b", Operator),
            (r"_", Keyword),  # The don't-care variable
            (r"([a-z]+)(:)", bygroups(Name.Namespace, Punctuation)),
            (
                r"([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]"
                r"[\w$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)"
                r"(\s*)(:-|-->)",
                # bygroups(Name.Function, Text, Operator),
                special_function_callback,
            ),  # function defn
            (
                r"([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]"
                r"[\w$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)"
                r"(\s*)(\()",
                # bygroups(Name.Function, Text, Punctuation),
                special_function_callback,
            ),
            (
                r"[a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]"
                r"[\w$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*",
                # String.Atom,
                special_atom_callback,
            ),  # atom, characters
            # This one includes !
            (
                r"[#&*+\-./:<=>?@\\^~\u00a1-\u00bf\u2010-\u303f]+",
                # String.Atom,
                special_atom_callback,
            ),  # atom, graphics
            (r"[A-Z_]\w*", Name.Variable),
            (r"\s+|[\u2000-\u200f\ufff0-\ufffe\uffef]", Text),
        ],
        "nested-comment": [
            (r"\*/", Comment.Multiline, "#pop"),
            (r"/\*", Comment.Multiline, "#push"),
            (r"[^*/]+", Comment.Multiline),
            (r"[*/]", Comment.Multiline),
        ],
    }

    def analyse_text(text):
        return ":-" in text

    def get_special_words(self, text):
       # do something smarter than the dummy code below
        return ["vacation", holidays]

    def get_tokens(self, text, unfiltered=False):
        """
        Return an iterable of (tokentype, value) pairs generated from
        `text`. If `unfiltered` is set to `True`, the filtering mechanism
        is bypassed even if filters are defined.
        Also preprocess the text, i.e. expand tabs and strip it if
        wanted and applies registered filters.
        """

        self.special_words = self.get_special_wrods(text)

        if not isinstance(text, str):
            if self.encoding == "guess":
                text, _ = guess_decode(text)
            elif self.encoding == "chardet":
                try:
                    import chardet
                except ImportError as e:
                    raise ImportError(
                        "To enable chardet encoding guessing, "
                        "please install the chardet library "
                        "from http://chardet.feedparser.org/"
                    ) from e
                # check for BOM first
                decoded = None
                for bom, encoding in _encoding_map:
                    if text.startswith(bom):
                        decoded = text[len(bom) :].decode(encoding, "replace")
                        break
                # no BOM found, so use chardet
                if decoded is None:
                    enc = chardet.detect(text[:1024])  # Guess using first 1KB
                    decoded = text.decode(enc.get("encoding") or "utf-8", "replace")
                text = decoded
            else:
                text = text.decode(self.encoding)
                if text.startswith("\ufeff"):
                    text = text[len("\ufeff") :]
        else:
            if text.startswith("\ufeff"):
                text = text[len("\ufeff") :]

        # text now *is* a unicode string
        text = text.replace("\r\n", "\n")
        text = text.replace("\r", "\n")
        if self.stripall:
            text = text.strip()
        elif self.stripnl:
            text = text.strip("\n")
        if self.tabsize > 0:
            text = text.expandtabs(self.tabsize)
        if self.ensurenl and not text.endswith("\n"):
            text += "\n"

        def streamer():
            for _, t, v in self.get_tokens_unprocessed(text):
                yield t, v

        stream = streamer()
        if not unfiltered:
            stream = apply_filters(stream, self.filters, self)
        return stream

What is happening, is that now the special_words are initially not given. Only after running get_tokens the lexer know what the special words are. The weird thing now is that this works fine when using pygments only but does not work anymore using minted. By not work I mean that with minted the tokens in the special_words list get not correctly highlighted.

My suspicion is that minted somehow/somewhere creates a new instantiation of the lexer that does not use the new get_tokens functions but a different one. Any idea what might be going on?