lark-parser / lark

Lark is a parsing toolkit for Python, built with a focus on ergonomics, performance and modularity.
MIT License
4.77k stars 404 forks source link

Lark not calling imported transformer methods... which defeats the purpose of modularity / importing... #1274

Closed enjoysmath closed 1 year ago

enjoysmath commented 1 year ago

image

I have utility.py:

from lark import Transformer

class ParseTreeToUtility(Transformer):
   def permute3_213(self, t):
      return (t[1], t[0], t[2])

   def permute3_231(self, t):
      return (t[2], t[0], t[1])

   def permute3_321(self, t):
      return (t[2], t[1], t[0])

   def permute3_312(self, t):
      return (t[1], t[2], t[0])

   def permute3_321(self, t):
      return (t[2], t[1], t[0])

   def permute2_21(self, t):
      return (t[1], t[0])

variable.py:

from .lang import Lang
from lark import Transformer, Lark
from lark.visitors import merge_transformers
from .utility import ParseTreeToUtility
import os

class Variable(Lang):
   def __init__(self, base:str, sub:int=None, sup:int=None, primes:int=None):
      super().__init__()
      self._base = base
      self._sub = sub
      self._sup = sup
      self._primes = primes

   def __str__(self) -> str:
      out = self._base

      if len(out) > 1:
         if out in greek_variable_standard:
            out = greek_variable_standard[out]
         else:
            out = '\\' + out

      if self._sub is not None:
         sub = str(self._sub)
         if len(sub) > 1:
            sub = f'{{{sub}}}'
         out += f'_{sub}'

      if self._sup is not None:
         sup = str(self._sup)
         if len(sup) > 1:
            sup = f'{{{sup}}}'
         out += f'^_{sup}'

      if self._primes is not None:
         pr = "'" * self._primes
         if self._sup:
            out = f'{{{out}}}'
         out += pr

      return out

   def __hash__(self):
      return hash(str(self))

   def __eq__(self, x) -> bool:
      if not isinstance(x, Variable):
         return False
      return self._base == x._base and \
             self._sub == x._sub and \
             self._sup == x._sup and \
             self._primes == x._primes

   @staticmethod
   def default_parser(start:str=None):
      if start is None:
         start = 'var'      
      grammar_file = os.path.join(os.path.dirname(__file__), 'variable.lark')
      return Lark.open(grammar_file, start=start, lexer='contextual', parser='lalr')

   @staticmethod
   def default_transformer():
      transformer = merge_transformers(ParseTreeToVariable(), 
                                       imported=ParseTreeToUtility())
      return transformer

greek_variable_standard = {
   'alpha': 'α', 'beta': 'β', 'gamma': 'γ', 'delta': 'δ', 'epsilon' : 'ϵ', 'zeta': 'ζ', 'eta' : 'η',
    'theta': 'θ', 'iota': 'ι', 'kappa': 'κ', 'lambda' : 'λ', 'mu': 'μ', 'nu': 'ν', 'xi': 'ξ', 'pi': 'π', 
   'rho': 'ρ', 'sigma': 'σ', 'tau': 'τ', 'upsilon': 'υ', 'phi': 'ϕ', 'chi': 'χ', 'psi': 'ψ', 'omega': 'ω', 
   'varepsilon':'ε', 'vartheta': 'ϑ', 'varpi': 'ϖ', 'varrho': 'ϱ', 'varsigma': 'ς', 'varphi': 'φ', 
   'digamma': 'ϝ', 'Alpha': 'A', 'Beta': 'B', 'Gamma': 'Γ', 'Delta': 'Δ', 'Epsilon': 'E', 'Zeta': 'Z', 
   'Eta': 'H', 'Theta': 'Θ', 'Iota': 'I', 'Kappa': 'K', 'Lambda': 'Λ', 'Mu': 'M', 'Nu': 'N', 'Xi': 'Ξ', 
   'Omega': 'O', 'Pi': 'Π', 'Sigma': 'Σ', 'Tau': 'T', 'Upsilon': 'Υ', 'Phi': 'Φ', 'Chi': 'X', 
   'Psi': 'Ψ', 'Omega':  'Ω'
}

class ParseTreeToVariable(Transformer):
   def greek_letter(self, t):
      return str(t[0])

   def greek_alpha(self, t):
      return t[0]

   def var_base(self, t):
      return str(t[0])

   def var_base_only(self, t):
      return Variable(t[0])

   def QUOTE(self, t):
      return str(t)

   def quote_primes(self, t):
      return len(t)

   def var_base_primes(self, t):
      return Variable(t[0], primes=t[1])

   def DIGIT(self, t):
      return str(t)

   def subscript(self, t):
      return t[0]

   def supscript(self, t):
      return t[0]

   def var_base_sub_sup_primes(self, t):
      print(t)
      return t

utility.lark:


permute3{a1,a2,a3}: a1 a2 a3 
                  | a2 a1 a3     -> permute3_213
                  | a2 a3 a1     -> permute3_231 
                  | a3 a2 a1     -> permute3_321 
                  | a3 a1 a2     -> permute3_312
                  | a1 a3 a2     -> permute3_132

permute2{a1, a2}: a1 a2       
                | a2 a1          -> permute2_21

and finally variable.lark:

var: var_base permute3{subscript, supscript, primes}  -> var_base_sub_sup_primes
   | var_base permute2{subscript, supscript}          -> var_base_sub_sup
   | var_base permute2{subscript, primes}             -> var_base_sup_primes
   | var_base permute2{supscript, primes}             -> var_base_sub_primes
   | var_base subscript                               -> var_base_supscript
   | var_base supscript                               -> var_base_subscript
   | var_base primes                                  -> var_base_primes
   | var_base                                         -> var_base_only

!primes: "'"+                           -> quote_primes
       | "^" "{" "\\prime" "}"          -> sup_primes

subscript: "_" DIGIT
         | "_" "{" SIGNED_INT "}"

supscript: "^" DIGIT
         | "^" "{" SIGNED_INT "}"

var_base: greek_alpha
        | LATIN_ALPHA
        | misc_alpha

misc_alpha: "\\" misc_letter
misc_letter: "partial"

greek_alpha: "\\" greek_letter

!greek_letter: "Alpha" | "Beta" | "Gamma" | "Delta" | "Epsilon" | "Zeta" | "Eta" 
             | "Theta" | "Iota" | "Kappa" | "Lambda" | "Mu" | "Nu" | "Xi" | "Omicron" | "Pi" | "Rho"
             | "Sigma" | "Tau" | "Upsilon" | "Phi" | "Chi" | "Psi" | "Omega" | "varGamma" | "varDelta"
             | "varTheta" | "varLambda" | "varXi" | "varPi" | "varSigma" | "varUpsilon" | "varPhi"
             | "varPsi" | "varOmega" | "alpha" | "beta" | "gamma" | "delta" | "epsilon" | "zeta" | "eta"
             | "theta" | "iota" | "kappa" | "lambda" | "mu" | "nu" | "xi" | "omicron" | "pi" | "rho" 
             | "sigma" | "tau" | "upsilon" | "phi" | "chi" | "psi" | "omega" | "varepsilon" | "varkappa"
             | "vartheta" | "pi" | "rho" | "sigma" | "phi" | "digamma" | "thetasym"

LATIN_ALPHA: /[a-zA-Z]/

%ignore WS

%import common.WS
%import common.DIGIT
%import common.SIGNED_INT

%import .utility.permute2
%import .utility.permute3

Expected behavior:

That permute3_312 gets called first so that the proper debug output would be ['1', '2', 2] and not [2, '1', '2']

The point of the permutation utilities was to make it easy to try every "permutation" of grammar vars/terms A, B, C. Now I can't do that, because the ParseTreeToUtility.permute3_312() method is never appropriately called.

That is a bug for me.

enjoysmath commented 1 year ago

Fixed it! Apparently, the documentation is half-right based on the fact that the example has imported="blah" while the **kwargs description says to use the name_prefix. So look here:

image

I changed imported= to "utility=" and it fucking works now!!!! I'm so pleased 👍 :)

erezsh commented 1 year ago

That's great! If you can think of ways to improve the documentation, please submit a PR! (or even just list them in a new issue)